diff --git a/asmjit b/asmjit index 5ac69447dc..316812daf0 160000 --- a/asmjit +++ b/asmjit @@ -1 +1 @@ -Subproject commit 5ac69447dc2b7bca332be552cbe747051641f9e9 +Subproject commit 316812daf0d734f1e3dc3abb05785737513274f0 diff --git a/asmjit.vcxproj b/asmjit.vcxproj new file mode 100644 index 0000000000..98dbfb40b4 --- /dev/null +++ b/asmjit.vcxproj @@ -0,0 +1,169 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {AC40FF01-426E-4838-A317-66354CEFAE88} + asmjit + + + + StaticLibrary + true + v120 + Unicode + + + StaticLibrary + true + v120 + Unicode + + + StaticLibrary + false + v120 + true + Unicode + + + StaticLibrary + false + v120 + true + Unicode + + + + + + + + + + + + + + + + + + + .\libs\$(Configuration)_x86\ + + + + + .\libs\$(Configuration)_x86\ + + + + + .\libs\$(Configuration)\ + + + + + .\libs\$(Configuration)\ + + + + + + Level3 + Disabled + false + ASMJIT_STATIC;_MBCS;%(PreprocessorDefinitions) + + + true + + + + + Level3 + Disabled + false + ASMJIT_STATIC;_MBCS;%(PreprocessorDefinitions) + + + true + + + + + Level3 + MaxSpeed + true + true + false + ASMJIT_STATIC;_UNICODE;UNICODE;%(PreprocessorDefinitions) + + + true + true + true + + + + + Level3 + MaxSpeed + true + true + false + ASMJIT_STATIC;_UNICODE;UNICODE;%(PreprocessorDefinitions) + + + true + true + true + + + + + + \ No newline at end of file diff --git a/asmjit.vcxproj.filters b/asmjit.vcxproj.filters new file mode 100644 index 0000000000..bddd91cf68 --- /dev/null +++ b/asmjit.vcxproj.filters @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/asmjit.vcxproj.user b/asmjit.vcxproj.user new file mode 100644 index 0000000000..ef5ff2a1fa --- /dev/null +++ b/asmjit.vcxproj.user @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/rpcs3.sln b/rpcs3.sln index 3eda99f54f..2a23107f36 100644 --- a/rpcs3.sln +++ b/rpcs3.sln @@ -1,6 +1,6 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 -VisualStudioVersion = 12.0.21005.1 +VisualStudioVersion = 12.0.30110.0 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rpcs3", "rpcs3\rpcs3.vcxproj", "{70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}" ProjectSection(ProjectDependencies) = postProject @@ -80,6 +80,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stc", "wxWidgets\build\msw\ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "wxscintilla", "wxWidgets\build\msw\wx_vc10_wxscintilla.vcxproj", "{74827EBD-93DC-5110-BA95-3F2AB029B6B0}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asmjit", "asmjit.vcxproj", "{AC40FF01-426E-4838-A317-66354CEFAE88}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug - MemLeak|Win32 = Debug - MemLeak|Win32 @@ -102,10 +104,10 @@ Global {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release|Win32.Build.0 = Release|Win32 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release|x64.ActiveCfg = Release|x64 {70CD65B0-91D6-4FAE-9A7B-4AF55D0D1B12}.Release|x64.Build.0 = Release|x64 - {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug - MemLeak|x64.Build.0 = Debug|x64 {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug|Win32.ActiveCfg = Debug|Win32 {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug|Win32.Build.0 = Debug|Win32 {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Debug|x64.ActiveCfg = Debug|x64 @@ -114,10 +116,10 @@ Global {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Release|Win32.Build.0 = Release|Win32 {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Release|x64.ActiveCfg = Release|x64 {6FCB55A5-563F-4039-1D79-1EB6ED8AAB82}.Release|x64.Build.0 = Release|x64 - {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug - MemLeak|x64.Build.0 = Debug|x64 {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug|Win32.ActiveCfg = Debug|Win32 {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug|Win32.Build.0 = Debug|Win32 {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Debug|x64.ActiveCfg = Debug|x64 @@ -126,10 +128,10 @@ Global {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Release|Win32.Build.0 = Release|Win32 {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Release|x64.ActiveCfg = Release|x64 {7047EE97-7F80-A70D-6147-BC11102DB6F4}.Release|x64.Build.0 = Release|x64 - {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug - MemLeak|x64.Build.0 = Debug|x64 {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug|Win32.ActiveCfg = Debug|Win32 {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug|Win32.Build.0 = Debug|Win32 {3111D679-7796-23C4-BA0C-271F1145DA24}.Debug|x64.ActiveCfg = Debug|x64 @@ -138,10 +140,10 @@ Global {3111D679-7796-23C4-BA0C-271F1145DA24}.Release|Win32.Build.0 = Release|Win32 {3111D679-7796-23C4-BA0C-271F1145DA24}.Release|x64.ActiveCfg = Release|x64 {3111D679-7796-23C4-BA0C-271F1145DA24}.Release|x64.Build.0 = Release|x64 - {067D9406-2A93-DACA-9449-93A2D356357D}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {067D9406-2A93-DACA-9449-93A2D356357D}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {067D9406-2A93-DACA-9449-93A2D356357D}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {067D9406-2A93-DACA-9449-93A2D356357D}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {067D9406-2A93-DACA-9449-93A2D356357D}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {067D9406-2A93-DACA-9449-93A2D356357D}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {067D9406-2A93-DACA-9449-93A2D356357D}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {067D9406-2A93-DACA-9449-93A2D356357D}.Debug - MemLeak|x64.Build.0 = Debug|x64 {067D9406-2A93-DACA-9449-93A2D356357D}.Debug|Win32.ActiveCfg = Debug|Win32 {067D9406-2A93-DACA-9449-93A2D356357D}.Debug|Win32.Build.0 = Debug|Win32 {067D9406-2A93-DACA-9449-93A2D356357D}.Debug|x64.ActiveCfg = Debug|x64 @@ -150,10 +152,10 @@ Global {067D9406-2A93-DACA-9449-93A2D356357D}.Release|Win32.Build.0 = Release|Win32 {067D9406-2A93-DACA-9449-93A2D356357D}.Release|x64.ActiveCfg = Release|x64 {067D9406-2A93-DACA-9449-93A2D356357D}.Release|x64.Build.0 = Release|x64 - {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug - MemLeak|x64.Build.0 = Debug|x64 {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug|Win32.ActiveCfg = Debug|Win32 {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug|Win32.Build.0 = Debug|Win32 {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Debug|x64.ActiveCfg = Debug|x64 @@ -162,10 +164,10 @@ Global {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Release|Win32.Build.0 = Release|Win32 {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Release|x64.ActiveCfg = Release|x64 {9ED1866B-D4AE-3440-24E4-7A9475B163B2}.Release|x64.Build.0 = Release|x64 - {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug - MemLeak|x64.Build.0 = Debug|x64 {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug|Win32.ActiveCfg = Debug|Win32 {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug|Win32.Build.0 = Debug|Win32 {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Debug|x64.ActiveCfg = Debug|x64 @@ -174,10 +176,10 @@ Global {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Release|Win32.Build.0 = Release|Win32 {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Release|x64.ActiveCfg = Release|x64 {99C9EB95-DB4C-1996-490E-5212EFBF07C3}.Release|x64.Build.0 = Release|x64 - {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug - MemLeak|x64.Build.0 = Debug|x64 {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug|Win32.ActiveCfg = Debug|Win32 {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug|Win32.Build.0 = Debug|Win32 {6EDC3B79-D217-F11A-406F-F11D856493F9}.Debug|x64.ActiveCfg = Debug|x64 @@ -186,10 +188,10 @@ Global {6EDC3B79-D217-F11A-406F-F11D856493F9}.Release|Win32.Build.0 = Release|Win32 {6EDC3B79-D217-F11A-406F-F11D856493F9}.Release|x64.ActiveCfg = Release|x64 {6EDC3B79-D217-F11A-406F-F11D856493F9}.Release|x64.Build.0 = Release|x64 - {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug - MemLeak|x64.Build.0 = Debug|x64 {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug|Win32.ActiveCfg = Debug|Win32 {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug|Win32.Build.0 = Debug|Win32 {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Debug|x64.ActiveCfg = Debug|x64 @@ -198,10 +200,10 @@ Global {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Release|Win32.Build.0 = Release|Win32 {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Release|x64.ActiveCfg = Release|x64 {A9AC9CF5-8E6C-0BA2-0769-6E42EDB88E25}.Release|x64.Build.0 = Release|x64 - {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug - MemLeak|x64.Build.0 = Debug|x64 {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug|Win32.ActiveCfg = Debug|Win32 {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug|Win32.Build.0 = Debug|Win32 {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Debug|x64.ActiveCfg = Debug|x64 @@ -210,10 +212,10 @@ Global {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Release|Win32.Build.0 = Release|Win32 {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Release|x64.ActiveCfg = Release|x64 {CD478F02-7550-58A5-E085-CE4BC0C0AD23}.Release|x64.Build.0 = Release|x64 - {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug - MemLeak|x64.Build.0 = Debug|x64 {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug|Win32.ActiveCfg = Debug|Win32 {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug|Win32.Build.0 = Debug|Win32 {22B14659-C5B6-B775-868D-A49198FEAD4A}.Debug|x64.ActiveCfg = Debug|x64 @@ -222,10 +224,10 @@ Global {22B14659-C5B6-B775-868D-A49198FEAD4A}.Release|Win32.Build.0 = Release|Win32 {22B14659-C5B6-B775-868D-A49198FEAD4A}.Release|x64.ActiveCfg = Release|x64 {22B14659-C5B6-B775-868D-A49198FEAD4A}.Release|x64.Build.0 = Release|x64 - {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug - MemLeak|x64.Build.0 = Debug|x64 {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug|Win32.ActiveCfg = Debug|Win32 {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug|Win32.Build.0 = Debug|Win32 {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Debug|x64.ActiveCfg = Debug|x64 @@ -234,10 +236,10 @@ Global {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Release|Win32.Build.0 = Release|Win32 {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Release|x64.ActiveCfg = Release|x64 {FAF0CB93-F7CE-A6B8-8342-19CE99BAF774}.Release|x64.Build.0 = Release|x64 - {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug - MemLeak|x64.Build.0 = Debug|x64 {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug|Win32.ActiveCfg = Debug|Win32 {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug|Win32.Build.0 = Debug|Win32 {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Debug|x64.ActiveCfg = Debug|x64 @@ -246,10 +248,10 @@ Global {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Release|Win32.Build.0 = Release|Win32 {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Release|x64.ActiveCfg = Release|x64 {46333DC3-B4A5-3DCC-E8BF-A3F20ADC56D2}.Release|x64.Build.0 = Release|x64 - {5C363C34-4741-7036-861C-2E2279CF552E}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {5C363C34-4741-7036-861C-2E2279CF552E}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {5C363C34-4741-7036-861C-2E2279CF552E}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {5C363C34-4741-7036-861C-2E2279CF552E}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {5C363C34-4741-7036-861C-2E2279CF552E}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {5C363C34-4741-7036-861C-2E2279CF552E}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {5C363C34-4741-7036-861C-2E2279CF552E}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {5C363C34-4741-7036-861C-2E2279CF552E}.Debug - MemLeak|x64.Build.0 = Debug|x64 {5C363C34-4741-7036-861C-2E2279CF552E}.Debug|Win32.ActiveCfg = Debug|Win32 {5C363C34-4741-7036-861C-2E2279CF552E}.Debug|Win32.Build.0 = Debug|Win32 {5C363C34-4741-7036-861C-2E2279CF552E}.Debug|x64.ActiveCfg = Debug|x64 @@ -258,10 +260,10 @@ Global {5C363C34-4741-7036-861C-2E2279CF552E}.Release|Win32.Build.0 = Release|Win32 {5C363C34-4741-7036-861C-2E2279CF552E}.Release|x64.ActiveCfg = Release|x64 {5C363C34-4741-7036-861C-2E2279CF552E}.Release|x64.Build.0 = Release|x64 - {76169FE8-0814-4F36-6409-699EF1A23001}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {76169FE8-0814-4F36-6409-699EF1A23001}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {76169FE8-0814-4F36-6409-699EF1A23001}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {76169FE8-0814-4F36-6409-699EF1A23001}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {76169FE8-0814-4F36-6409-699EF1A23001}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {76169FE8-0814-4F36-6409-699EF1A23001}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {76169FE8-0814-4F36-6409-699EF1A23001}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {76169FE8-0814-4F36-6409-699EF1A23001}.Debug - MemLeak|x64.Build.0 = Debug|x64 {76169FE8-0814-4F36-6409-699EF1A23001}.Debug|Win32.ActiveCfg = Debug|Win32 {76169FE8-0814-4F36-6409-699EF1A23001}.Debug|Win32.Build.0 = Debug|Win32 {76169FE8-0814-4F36-6409-699EF1A23001}.Debug|x64.ActiveCfg = Debug|x64 @@ -270,10 +272,10 @@ Global {76169FE8-0814-4F36-6409-699EF1A23001}.Release|Win32.Build.0 = Release|Win32 {76169FE8-0814-4F36-6409-699EF1A23001}.Release|x64.ActiveCfg = Release|x64 {76169FE8-0814-4F36-6409-699EF1A23001}.Release|x64.Build.0 = Release|x64 - {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug - MemLeak|x64.Build.0 = Debug|x64 {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug|Win32.ActiveCfg = Debug|Win32 {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug|Win32.Build.0 = Debug|Win32 {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Debug|x64.ActiveCfg = Debug|x64 @@ -282,10 +284,10 @@ Global {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Release|Win32.Build.0 = Release|Win32 {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Release|x64.ActiveCfg = Release|x64 {949C6DB8-E638-6EC6-AB31-BCCFD1379E01}.Release|x64.Build.0 = Release|x64 - {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug - MemLeak|x64.Build.0 = Debug|x64 {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug|Win32.ActiveCfg = Debug|Win32 {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug|Win32.Build.0 = Debug|Win32 {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Debug|x64.ActiveCfg = Debug|x64 @@ -294,10 +296,10 @@ Global {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Release|Win32.Build.0 = Release|Win32 {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Release|x64.ActiveCfg = Release|x64 {B87216CD-6C64-1DB0-D900-BC6E745C1DF9}.Release|x64.Build.0 = Release|x64 - {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug - MemLeak|x64.Build.0 = Debug|x64 {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug|Win32.ActiveCfg = Debug|Win32 {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug|Win32.Build.0 = Debug|Win32 {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Debug|x64.ActiveCfg = Debug|x64 @@ -306,10 +308,10 @@ Global {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Release|Win32.Build.0 = Release|Win32 {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Release|x64.ActiveCfg = Release|x64 {AFF2C68B-B867-DD50-6AC5-74B09D41F8EA}.Release|x64.Build.0 = Release|x64 - {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug - MemLeak|x64.Build.0 = Debug|x64 {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug|Win32.ActiveCfg = Debug|Win32 {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug|Win32.Build.0 = Debug|Win32 {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Debug|x64.ActiveCfg = Debug|x64 @@ -318,10 +320,10 @@ Global {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Release|Win32.Build.0 = Release|Win32 {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Release|x64.ActiveCfg = Release|x64 {6FDC76D5-CB44-B9F8-5EF6-C59B020719DF}.Release|x64.Build.0 = Release|x64 - {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug - MemLeak|x64.Build.0 = Debug|x64 {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug|Win32.ActiveCfg = Debug|Win32 {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug|Win32.Build.0 = Debug|Win32 {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Debug|x64.ActiveCfg = Debug|x64 @@ -330,10 +332,10 @@ Global {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Release|Win32.Build.0 = Release|Win32 {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Release|x64.ActiveCfg = Release|x64 {8BECCA95-C7D7-CFF8-FDB1-4950E9F8E8E6}.Release|x64.Build.0 = Release|x64 - {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug - MemLeak|x64.Build.0 = Debug|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug|Win32.ActiveCfg = Debug|Win32 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug|Win32.Build.0 = Debug|Win32 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Debug|x64.ActiveCfg = Debug|x64 @@ -342,10 +344,10 @@ Global {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release|Win32.Build.0 = Release|Win32 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release|x64.ActiveCfg = Release|x64 {87B42A9C-3F5C-53D7-9017-2B1CAE39457D}.Release|x64.Build.0 = Release|x64 - {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug - MemLeak|x64.Build.0 = Debug|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug|Win32.ActiveCfg = Debug|Win32 {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug|Win32.Build.0 = Debug|Win32 {23E1C437-A951-5943-8639-A17F3CF2E606}.Debug|x64.ActiveCfg = Debug|x64 @@ -354,10 +356,10 @@ Global {23E1C437-A951-5943-8639-A17F3CF2E606}.Release|Win32.Build.0 = Release|Win32 {23E1C437-A951-5943-8639-A17F3CF2E606}.Release|x64.ActiveCfg = Release|x64 {23E1C437-A951-5943-8639-A17F3CF2E606}.Release|x64.Build.0 = Release|x64 - {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - MemLeak|Win32.ActiveCfg = Debug - MemLeak|Win32 - {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - MemLeak|Win32.Build.0 = Debug - MemLeak|Win32 - {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - MemLeak|x64.ActiveCfg = Debug - MemLeak|x64 - {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - MemLeak|x64.Build.0 = Debug - MemLeak|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - MemLeak|Win32.Build.0 = Debug|Win32 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug - MemLeak|x64.Build.0 = Debug|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug|Win32.ActiveCfg = Debug|Win32 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug|Win32.Build.0 = Debug|Win32 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Debug|x64.ActiveCfg = Debug|x64 @@ -366,6 +368,16 @@ Global {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release|Win32.Build.0 = Release|Win32 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release|x64.ActiveCfg = Release|x64 {74827EBD-93DC-5110-BA95-3F2AB029B6B0}.Release|x64.Build.0 = Release|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug - MemLeak|Win32.ActiveCfg = Debug|Win32 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug - MemLeak|x64.ActiveCfg = Debug|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug|Win32.ActiveCfg = Debug|Win32 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug|Win32.Build.0 = Debug|Win32 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug|x64.ActiveCfg = Debug|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Debug|x64.Build.0 = Debug|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Release|Win32.ActiveCfg = Release|Win32 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Release|Win32.Build.0 = Release|Win32 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Release|x64.ActiveCfg = Release|x64 + {AC40FF01-426E-4838-A317-66354CEFAE88}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/rpcs3/Emu/ARMv7/ARMv7Decoder.h b/rpcs3/Emu/ARMv7/ARMv7Decoder.h index 71f37e3b38..b19bf94cd3 100644 --- a/rpcs3/Emu/ARMv7/ARMv7Decoder.h +++ b/rpcs3/Emu/ARMv7/ARMv7Decoder.h @@ -59,7 +59,7 @@ public: u8 I1 = 1 - (J1 ^ S); u8 I2 = 1 - (J2 ^ S); u16 imm11 = code1 & 0x7ff; - u32 imm32; + u32 imm32 = 0; switch(code1 >> 14) { diff --git a/rpcs3/Emu/ARMv7/ARMv7Interpreter.h b/rpcs3/Emu/ARMv7/ARMv7Interpreter.h index a11144e9b6..9aabb473fc 100644 --- a/rpcs3/Emu/ARMv7/ARMv7Interpreter.h +++ b/rpcs3/Emu/ARMv7/ARMv7Interpreter.h @@ -95,7 +95,7 @@ public: SRType DecodeImmShift(u8 type, u8 imm5, uint* shift_n) { - SRType shift_t; + SRType shift_t = SRType_None; switch(type) { @@ -119,7 +119,7 @@ public: SRType DecodeRegShift(u8 type) { - SRType shift_t; + SRType shift_t = SRType_None; switch(type) { @@ -235,7 +235,7 @@ public: bool ConditionPassed(u8 cond) { - bool result; + bool result = false; switch(cond >> 1) { diff --git a/rpcs3/Emu/Cell/PPCDecoder.h b/rpcs3/Emu/Cell/PPCDecoder.h index 866d16a990..8c1d55f405 100644 --- a/rpcs3/Emu/Cell/PPCDecoder.h +++ b/rpcs3/Emu/Cell/PPCDecoder.h @@ -14,19 +14,19 @@ public: template -static InstrList<1 << CodeField::size, TO>* new_list(const CodeField& func, InstrCaller* error_func = nullptr) +static InstrList<(1 << (CodeField::size)), TO>* new_list(const CodeField& func, InstrCaller* error_func = nullptr) { - return new InstrList<1 << CodeField::size, TO>(func, error_func); + return new InstrList<(1 << (CodeField::size)), TO>(func, error_func); } template -static InstrList<1 << CodeField::size, TO>* new_list(InstrList* parent, int opcode, const CodeField& func, InstrCaller* error_func = nullptr) +static InstrList<(1 << (CodeField::size)), TO>* new_list(InstrList* parent, int opcode, const CodeField& func, InstrCaller* error_func = nullptr) { - return connect_list(parent, new InstrList<1 << CodeField::size, TO>(func, error_func), opcode); + return connect_list(parent, new InstrList<(1 << (CodeField::size)), TO>(func, error_func), opcode); } template -static InstrList<1 << CodeField::size, TO>* new_list(InstrList* parent, const CodeField& func, InstrCaller* error_func = nullptr) +static InstrList<(1 << (CodeField::size)), TO>* new_list(InstrList* parent, const CodeField& func, InstrCaller* error_func = nullptr) { - return connect_list(parent, new InstrList<1 << CodeField::size, TO>(func, error_func)); + return connect_list(parent, new InstrList<(1 << (CodeField::size)), TO>(func, error_func)); } \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPUInterpreter.h b/rpcs3/Emu/Cell/PPUInterpreter.h index 6613a70109..c049a7e95a 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.h +++ b/rpcs3/Emu/Cell/PPUInterpreter.h @@ -181,7 +181,7 @@ private: CPU.VSCR.VSCR = CPU.VPR[vb]._u32[0]; CPU.VSCR.X = CPU.VSCR.Y = 0; } - void VADDCUW(u32 vd, u32 va, u32 vb) + void VADDCUW(u32 vd, u32 va, u32 vb) //nf { for (uint w = 0; w < 4; w++) { @@ -195,7 +195,7 @@ private: CPU.VPR[vd]._f[w] = CPU.VPR[va]._f[w] + CPU.VPR[vb]._f[w]; } } - void VADDSBS(u32 vd, u32 va, u32 vb) + void VADDSBS(u32 vd, u32 va, u32 vb) //nf { for(u32 b=0; b<16; ++b) { @@ -235,7 +235,7 @@ private: CPU.VPR[vd]._s16[h] = result; } } - void VADDSWS(u32 vd, u32 va, u32 vb) + void VADDSWS(u32 vd, u32 va, u32 vb) //nf { for (uint w = 0; w < 4; w++) { @@ -335,21 +335,21 @@ private: CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] & (~CPU.VPR[vb]._u32[w]); } } - void VAVGSB(u32 vd, u32 va, u32 vb) + void VAVGSB(u32 vd, u32 va, u32 vb) //nf { for (uint b = 0; b < 16; b++) { CPU.VPR[vd]._s8[b] = (CPU.VPR[va]._s8[b] + CPU.VPR[vb]._s8[b] + 1) >> 1; } } - void VAVGSH(u32 vd, u32 va, u32 vb) + void VAVGSH(u32 vd, u32 va, u32 vb) //nf { for (uint h = 0; h < 8; h++) { CPU.VPR[vd]._s16[h] = (CPU.VPR[va]._s16[h] + CPU.VPR[vb]._s16[h] + 1) >> 1; } } - void VAVGSW(u32 vd, u32 va, u32 vb) + void VAVGSW(u32 vd, u32 va, u32 vb) //nf { for (uint w = 0; w < 4; w++) { @@ -361,14 +361,14 @@ private: for (uint b = 0; b < 16; b++) CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] + CPU.VPR[vb]._u8[b] + 1) >> 1; } - void VAVGUH(u32 vd, u32 va, u32 vb) + void VAVGUH(u32 vd, u32 va, u32 vb) //nf { for (uint h = 0; h < 8; h++) { CPU.VPR[vd]._u16[h] = (CPU.VPR[va]._u16[h] + CPU.VPR[vb]._u16[h] + 1) >> 1; } } - void VAVGUW(u32 vd, u32 va, u32 vb) + void VAVGUW(u32 vd, u32 va, u32 vb) //nf { for (uint w = 0; w < 4; w++) { @@ -487,14 +487,14 @@ private: CPU.CR.cr6 = all_equal | none_equal; } - void VCMPEQUH(u32 vd, u32 va, u32 vb) + void VCMPEQUH(u32 vd, u32 va, u32 vb) //nf { for (uint h = 0; h < 8; h++) { CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] == CPU.VPR[vb]._u16[h] ? 0xffff : 0; } } - void VCMPEQUH_(u32 vd, u32 va, u32 vb) + void VCMPEQUH_(u32 vd, u32 va, u32 vb) //nf { int all_equal = 0x8; int none_equal = 0x2; @@ -599,7 +599,7 @@ private: CPU.CR.cr6 = all_ge | none_ge; } - void VCMPGTSB(u32 vd, u32 va, u32 vb) + void VCMPGTSB(u32 vd, u32 va, u32 vb) //nf { for (uint b = 0; b < 16; b++) { @@ -833,7 +833,7 @@ private: CPU.VPR[vd]._f[w] = max(CPU.VPR[va]._f[w], CPU.VPR[vb]._f[w]); } } - void VMAXSB(u32 vd, u32 va, u32 vb) + void VMAXSB(u32 vd, u32 va, u32 vb) //nf { for (uint b = 0; b < 16; b++) CPU.VPR[vd]._s8[b] = max(CPU.VPR[va]._s8[b], CPU.VPR[vb]._s8[b]); @@ -918,7 +918,7 @@ private: CPU.VPR[vd]._f[w] = min(CPU.VPR[va]._f[w], CPU.VPR[vb]._f[w]); } } - void VMINSB(u32 vd, u32 va, u32 vb) + void VMINSB(u32 vd, u32 va, u32 vb) //nf { for (uint b = 0; b < 16; b++) { @@ -1021,7 +1021,7 @@ private: CPU.VPR[vd]._u32[3 - d*2 - 1] = CPU.VPR[vb]._u32[1 - d]; } } - void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) + void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) //nf { for (uint w = 0; w < 4; w++) { @@ -1036,7 +1036,7 @@ private: CPU.VPR[vd]._s32[w] = result; } } - void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) + void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) //nf { for (uint w = 0; w < 4; w++) { @@ -1051,7 +1051,7 @@ private: CPU.VPR[vd]._s32[w] = result; } } - void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) + void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) //nf { for (uint w = 0; w < 4; w++) { @@ -1096,7 +1096,7 @@ private: CPU.VPR[vd]._u32[w] = result; } } - void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) + void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) //nf { for (uint w = 0; w < 4; w++) { @@ -1111,7 +1111,7 @@ private: CPU.VPR[vd]._u32[w] = result; } } - void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) + void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) //nf { for (uint w = 0; w < 4; w++) { @@ -1136,7 +1136,7 @@ private: CPU.VPR[vd]._u32[w] = saturated; } } - void VMULESB(u32 vd, u32 va, u32 vb) + void VMULESB(u32 vd, u32 va, u32 vb) //nf { for (uint h = 0; h < 8; h++) { @@ -1164,7 +1164,7 @@ private: CPU.VPR[vd]._u32[w] = (u32)CPU.VPR[va]._u16[w*2+1] * (u32)CPU.VPR[vb]._u16[w*2+1]; } } - void VMULOSB(u32 vd, u32 va, u32 vb) + void VMULOSB(u32 vd, u32 va, u32 vb) //nf { for (uint h = 0; h < 8; h++) { @@ -1243,7 +1243,7 @@ private: CPU.VPR[vd]._u16[4 + (3 - h)] = (ab7 << 15) | (ab8 << 10) | (ab16 << 5) | ab24; } } - void VPKSHSS(u32 vd, u32 va, u32 vb) + void VPKSHSS(u32 vd, u32 va, u32 vb) //nf { for (uint b = 0; b < 8; b++) { @@ -1348,7 +1348,7 @@ private: CPU.VPR[vd]._s16[h] = result; } } - void VPKSWUS(u32 vd, u32 va, u32 vb) + void VPKSWUS(u32 vd, u32 va, u32 vb) //nf { for (uint h = 0; h < 4; h++) { @@ -1383,7 +1383,7 @@ private: CPU.VPR[vd]._u16[h] = result; } } - void VPKUHUM(u32 vd, u32 va, u32 vb) + void VPKUHUM(u32 vd, u32 va, u32 vb) //nf { for (uint b = 0; b < 8; b++) { @@ -1424,7 +1424,7 @@ private: CPU.VPR[vd]._u16[h ] = CPU.VPR[vb]._u16[h*2]; } } - void VPKUWUS(u32 vd, u32 va, u32 vb) + void VPKUWUS(u32 vd, u32 va, u32 vb) //nf { for (uint h = 0; h < 4; h++) { @@ -1486,7 +1486,7 @@ private: CPU.VPR[vd]._f[w] = f; } } - void VRLB(u32 vd, u32 va, u32 vb) + void VRLB(u32 vd, u32 va, u32 vb) //nf { for (uint b = 0; b < 16; b++) { @@ -1495,7 +1495,7 @@ private: CPU.VPR[vd]._u8[b] = (CPU.VPR[va]._u8[b] << nRot) | (CPU.VPR[va]._u8[b] >> (8 - nRot)); } } - void VRLH(u32 vd, u32 va, u32 vb) + void VRLH(u32 vd, u32 va, u32 vb) //nf { for (uint h = 0; h < 8; h++) { @@ -1524,7 +1524,7 @@ private: CPU.VPR[vd]._u8[b] = (CPU.VPR[vb]._u8[b] & CPU.VPR[vc]._u8[b]) | (CPU.VPR[va]._u8[b] & (~CPU.VPR[vc]._u8[b])); } } - void VSL(u32 vd, u32 va, u32 vb) + void VSL(u32 vd, u32 va, u32 vb) //nf { u8 sh = CPU.VPR[vb]._u8[0] & 0x7; @@ -1648,7 +1648,7 @@ private: CPU.VPR[vd]._u32[w] = word; } } - void VSR(u32 vd, u32 va, u32 vb) + void VSR(u32 vd, u32 va, u32 vb) //nf { u8 sh = CPU.VPR[vb]._u8[0] & 0x7; u32 t = 1; @@ -1676,7 +1676,7 @@ private: CPU.VPR[vd]._u32[3] = 0xCDCDCDCD; } } - void VSRAB(u32 vd, u32 va, u32 vb) + void VSRAB(u32 vd, u32 va, u32 vb) //nf { for (uint b = 0; b < 16; b++) { @@ -1729,7 +1729,7 @@ private: CPU.VPR[vd]._u32[w] = CPU.VPR[va]._u32[w] >> (CPU.VPR[vb]._u8[w*4] & 0x1f); } } - void VSUBCUW(u32 vd, u32 va, u32 vb) + void VSUBCUW(u32 vd, u32 va, u32 vb) //nf { for (uint w = 0; w < 4; w++) { @@ -1743,7 +1743,7 @@ private: CPU.VPR[vd]._f[w] = CPU.VPR[va]._f[w] - CPU.VPR[vb]._f[w]; } } - void VSUBSBS(u32 vd, u32 va, u32 vb) + void VSUBSBS(u32 vd, u32 va, u32 vb) //nf { for (uint b = 0; b < 16; b++) { @@ -1832,7 +1832,7 @@ private: CPU.VPR[vd]._u16[h] = CPU.VPR[va]._u16[h] - CPU.VPR[vb]._u16[h]; } } - void VSUBUHS(u32 vd, u32 va, u32 vb) + void VSUBUHS(u32 vd, u32 va, u32 vb) //nf { for (uint h = 0; h < 8; h++) { @@ -1915,7 +1915,7 @@ private: CPU.VPR[vd]._s32[1] = 0; CPU.VPR[vd]._s32[3] = 0; } - void VSUM4SBS(u32 vd, u32 va, u32 vb) + void VSUM4SBS(u32 vd, u32 va, u32 vb) //nf { for (uint w = 0; w < 4; w++) { @@ -2019,7 +2019,7 @@ private: CPU.VPR[vd]._u8[(3 - w)*4 + 0] = CPU.VPR[vb]._u8[8 + w*2 + 1] & 0x1f; } } - void VUPKLSB(u32 vd, u32 vb) + void VUPKLSB(u32 vd, u32 vb) //nf { for (uint h = 0; h < 8; h++) { diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index dfca767c9c..8522a34889 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -4,6 +4,7 @@ #include "Emu/Memory/Memory.h" #include "Emu/Cell/SPUThread.h" #include "Emu/SysCalls/SysCalls.h" +#include "Crypto/sha1.h" #define UNIMPLEMENTED() UNK(__FUNCTION__) @@ -14,6 +15,15 @@ __m128d m128d; } __u32x4; */ +#define MEM_AND_REG_HASH() \ + unsigned char mem_h[20]; sha1(&Memory[CPU.dmac.ls_offset], 256*1024, mem_h); \ + unsigned char reg_h[20]; sha1((const unsigned char*)CPU.GPR, sizeof(CPU.GPR), reg_h); \ + ConLog.Write("Mem hash: 0x%llx, reg hash: 0x%llx", *(u64*)mem_h, *(u64*)reg_h); + +#define LOG2_OPCODE(...) //MEM_AND_REG_HASH(); ConLog.Write(__FUNCTION__ "(): " __VA_ARGS__) + +#define LOG5_OPCODE(...) /// + class SPUInterpreter : public SPUOpcodes { private: @@ -32,125 +42,25 @@ private: //0 - 10 void STOP(u32 code) { - CPU.SetExitStatus(code); // exit code (not status) - - switch (code) - { - case 0x110: /* ===== sys_spu_thread_receive_event ===== */ - { - u32 spuq = 0; - if (!CPU.SPU.Out_MBox.Pop(spuq)) - { - ConLog.Error("sys_spu_thread_receive_event: cannot read Out_MBox"); - CPU.SPU.In_MBox.PushUncond(CELL_EINVAL); // ??? - return; - } - - if (CPU.SPU.In_MBox.GetCount()) - { - ConLog.Error("sys_spu_thread_receive_event(spuq=0x%x): In_MBox is not empty", spuq); - CPU.SPU.In_MBox.PushUncond(CELL_EBUSY); // ??? - return; - } - - if (Ini.HLELogging.GetValue()) - { - ConLog.Write("sys_spu_thread_receive_event(spuq=0x%x)", spuq); - } - - EventQueue* eq; - if (!CPU.SPUQs.GetEventQueue(FIX_SPUQ(spuq), eq)) - { - CPU.SPU.In_MBox.PushUncond(CELL_EINVAL); // TODO: check error value - return; - } - - u32 tid = GetCurrentSPUThread().GetId(); - - eq->sq.push(tid); // add thread to sleep queue - - while (true) - { - switch (eq->owner.trylock(tid)) - { - case SMR_OK: - if (!eq->events.count()) - { - eq->owner.unlock(tid); - break; - } - else - { - u32 next = (eq->protocol == SYS_SYNC_FIFO) ? eq->sq.pop() : eq->sq.pop_prio(); - if (next != tid) - { - eq->owner.unlock(tid, next); - break; - } - } - case SMR_SIGNAL: - { - sys_event_data event; - eq->events.pop(event); - eq->owner.unlock(tid); - CPU.SPU.In_MBox.PushUncond(CELL_OK); - CPU.SPU.In_MBox.PushUncond(event.data1); - CPU.SPU.In_MBox.PushUncond(event.data2); - CPU.SPU.In_MBox.PushUncond(event.data3); - return; - } - case SMR_FAILED: break; - default: eq->sq.invalidate(tid); CPU.SPU.In_MBox.PushUncond(CELL_ECANCELED); return; - } - - Sleep(1); - if (Emu.IsStopped()) - { - ConLog.Warning("sys_spu_thread_receive_event(spuq=0x%x) aborted", spuq); - eq->sq.invalidate(tid); - return; - } - } - } - break; - case 0x102: - if (!CPU.SPU.Out_MBox.GetCount()) - { - ConLog.Error("sys_spu_thread_exit (no status, code 0x102)"); - } - else if (Ini.HLELogging.GetValue()) - { - // the real exit status - ConLog.Write("sys_spu_thread_exit (status=0x%x)", CPU.SPU.Out_MBox.GetValue()); - } - CPU.Stop(); - break; - default: - if (!CPU.SPU.Out_MBox.GetCount()) - { - ConLog.Error("Unknown STOP code: 0x%x (no message)", code); - } - else - { - ConLog.Error("Unknown STOP code: 0x%x (message=0x%x)", code, CPU.SPU.Out_MBox.GetValue()); - } - CPU.Stop(); - break; - } + CPU.DoStop(code); + LOG2_OPCODE(); } void LNOP() { } void SYNC(u32 Cbit) { + // This instruction must be used following a store instruction that modifies the instruction stream. _mm_mfence(); } void DSYNC() { + // This instruction forces all earlier load, store, and channel instructions to complete before proceeding. _mm_mfence(); } void MFSPR(u32 rt, u32 sa) { + UNIMPLEMENTED(); //If register is a dummy register (register labeled 0x0) if(sa == 0x0) { @@ -219,17 +129,17 @@ private: } void ROTM(u32 rt, u32 ra, u32 rb) { - CPU.GPR[rt]._u32[0] = ((0 - CPU.GPR[rb]._u32[0]) % 64) < 32 ? CPU.GPR[ra]._u32[0] >> ((0 - CPU.GPR[rb]._u32[0]) % 64) : 0; - CPU.GPR[rt]._u32[1] = ((0 - CPU.GPR[rb]._u32[1]) % 64) < 32 ? CPU.GPR[ra]._u32[1] >> ((0 - CPU.GPR[rb]._u32[1]) % 64) : 0; - CPU.GPR[rt]._u32[2] = ((0 - CPU.GPR[rb]._u32[2]) % 64) < 32 ? CPU.GPR[ra]._u32[2] >> ((0 - CPU.GPR[rb]._u32[2]) % 64) : 0; - CPU.GPR[rt]._u32[3] = ((0 - CPU.GPR[rb]._u32[3]) % 64) < 32 ? CPU.GPR[ra]._u32[3] >> ((0 - CPU.GPR[rb]._u32[3]) % 64) : 0; + CPU.GPR[rt]._u32[0] = ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) < 32 ? CPU.GPR[ra]._u32[0] >> ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) : 0; + CPU.GPR[rt]._u32[1] = ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) < 32 ? CPU.GPR[ra]._u32[1] >> ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) : 0; + CPU.GPR[rt]._u32[2] = ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) < 32 ? CPU.GPR[ra]._u32[2] >> ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) : 0; + CPU.GPR[rt]._u32[3] = ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) < 32 ? CPU.GPR[ra]._u32[3] >> ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) : 0; } void ROTMA(u32 rt, u32 ra, u32 rb) { - CPU.GPR[rt]._i32[0] = ((0 - CPU.GPR[rb]._i32[0]) % 64) < 32 ? CPU.GPR[ra]._i32[0] >> ((0 - CPU.GPR[rb]._i32[0]) % 64) : CPU.GPR[ra]._i32[0] >> 31; - CPU.GPR[rt]._i32[1] = ((0 - CPU.GPR[rb]._i32[1]) % 64) < 32 ? CPU.GPR[ra]._i32[1] >> ((0 - CPU.GPR[rb]._i32[1]) % 64) : CPU.GPR[ra]._i32[1] >> 31; - CPU.GPR[rt]._i32[2] = ((0 - CPU.GPR[rb]._i32[2]) % 64) < 32 ? CPU.GPR[ra]._i32[2] >> ((0 - CPU.GPR[rb]._i32[2]) % 64) : CPU.GPR[ra]._i32[2] >> 31; - CPU.GPR[rt]._i32[3] = ((0 - CPU.GPR[rb]._i32[3]) % 64) < 32 ? CPU.GPR[ra]._i32[3] >> ((0 - CPU.GPR[rb]._i32[3]) % 64) : CPU.GPR[ra]._i32[3] >> 31; + CPU.GPR[rt]._i32[0] = ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[0] >> ((0 - CPU.GPR[rb]._u32[0]) & 0x3f) : CPU.GPR[ra]._i32[0] >> 31; + CPU.GPR[rt]._i32[1] = ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[1] >> ((0 - CPU.GPR[rb]._u32[1]) & 0x3f) : CPU.GPR[ra]._i32[1] >> 31; + CPU.GPR[rt]._i32[2] = ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[2] >> ((0 - CPU.GPR[rb]._u32[2]) & 0x3f) : CPU.GPR[ra]._i32[2] >> 31; + CPU.GPR[rt]._i32[3] = ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) < 32 ? CPU.GPR[ra]._i32[3] >> ((0 - CPU.GPR[rb]._u32[3]) & 0x3f) : CPU.GPR[ra]._i32[3] >> 31; } void SHL(u32 rt, u32 ra, u32 rb) { @@ -246,12 +156,12 @@ private: void ROTHM(u32 rt, u32 ra, u32 rb) { for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[h] = ((0 - CPU.GPR[rb]._u16[h]) % 32) < 16 ? CPU.GPR[ra]._u16[h] >> ((0 - CPU.GPR[rb]._u16[h]) % 32) : 0; + CPU.GPR[rt]._u16[h] = ((0 - CPU.GPR[rb]._u16[h]) & 0x1f) < 16 ? CPU.GPR[ra]._u16[h] >> ((0 - CPU.GPR[rb]._u16[h]) & 0x1f) : 0; } void ROTMAH(u32 rt, u32 ra, u32 rb) { for (int h = 0; h < 8; h++) - CPU.GPR[rt]._i16[h] = ((0 - CPU.GPR[rb]._i16[h]) % 32) < 16 ? CPU.GPR[ra]._i16[h] >> ((0 - CPU.GPR[rb]._i16[h]) % 32) : CPU.GPR[ra]._i16[h] >> 15; + CPU.GPR[rt]._i16[h] = ((0 - CPU.GPR[rb]._u16[h]) & 0x1f) < 16 ? CPU.GPR[ra]._i16[h] >> ((0 - CPU.GPR[rb]._u16[h]) & 0x1f) : CPU.GPR[ra]._i16[h] >> 15; } void SHLH(u32 rt, u32 ra, u32 rb) { @@ -268,7 +178,7 @@ private: } void ROTMI(u32 rt, u32 ra, s32 i7) { - const int nRot = (0 - i7) % 64; + const int nRot = (0 - i7) & 0x3f; CPU.GPR[rt]._u32[0] = nRot < 32 ? CPU.GPR[ra]._u32[0] >> nRot : 0; CPU.GPR[rt]._u32[1] = nRot < 32 ? CPU.GPR[ra]._u32[1] >> nRot : 0; CPU.GPR[rt]._u32[2] = nRot < 32 ? CPU.GPR[ra]._u32[2] >> nRot : 0; @@ -276,7 +186,7 @@ private: } void ROTMAI(u32 rt, u32 ra, s32 i7) { - const int nRot = (0 - i7) % 64; + const int nRot = (0 - i7) & 0x3f; CPU.GPR[rt]._i32[0] = nRot < 32 ? CPU.GPR[ra]._i32[0] >> nRot : CPU.GPR[ra]._i32[0] >> 31; CPU.GPR[rt]._i32[1] = nRot < 32 ? CPU.GPR[ra]._i32[1] >> nRot : CPU.GPR[ra]._i32[1] >> 31; CPU.GPR[rt]._i32[2] = nRot < 32 ? CPU.GPR[ra]._i32[2] >> nRot : CPU.GPR[ra]._i32[2] >> 31; @@ -287,7 +197,7 @@ private: const u32 s = i7 & 0x3f; for (u32 j = 0; j < 4; ++j) - CPU.GPR[rt]._u32[j] = CPU.GPR[ra]._u32[j] << s; + CPU.GPR[rt]._u32[j] = (s >= 32) ? 0 : CPU.GPR[ra]._u32[j] << s; } void ROTHI(u32 rt, u32 ra, s32 i7) { @@ -298,14 +208,14 @@ private: } void ROTHMI(u32 rt, u32 ra, s32 i7) { - const int nRot = (0 - i7) % 32; + const int nRot = (0 - i7) & 0x1f; for (int h = 0; h < 8; h++) CPU.GPR[rt]._u16[h] = nRot < 16 ? CPU.GPR[ra]._u16[h] >> nRot : 0; } void ROTMAHI(u32 rt, u32 ra, s32 i7) { - const int nRot = (0 - i7) % 32; + const int nRot = (0 - i7) & 0x1f; for (int h = 0; h < 8; h++) CPU.GPR[rt]._i16[h] = nRot < 16 ? CPU.GPR[ra]._i16[h] >> nRot : CPU.GPR[ra]._i16[h] >> 15; @@ -315,7 +225,7 @@ private: const int nRot = i7 & 0x1f; for (int h = 0; h < 8; h++) - CPU.GPR[rt]._u16[0] = nRot > 15 ? 0 : CPU.GPR[ra]._u16[0] << nRot; + CPU.GPR[rt]._u16[h] = nRot > 15 ? 0 : CPU.GPR[ra]._u16[h] << nRot; } void A(u32 rt, u32 ra, u32 rb) { @@ -369,26 +279,59 @@ private: } void BIZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u32[3] == 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + if (CPU.GPR[rt]._u32[3] == 0) + { + LOG5_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG5_OPCODE("not taken (0x%llx)", target); + } } void BINZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u32[3] != 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + if (CPU.GPR[rt]._u32[3] != 0) + { + LOG5_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG5_OPCODE("not taken (0x%llx)", target); + } } void BIHZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u16[6] == 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + if (CPU.GPR[rt]._u16[6] == 0) + { + LOG5_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG5_OPCODE("not taken (0x%llx)", target); + } } void BIHNZ(u32 rt, u32 ra) { - if(CPU.GPR[rt]._u16[6] != 0) - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + if (CPU.GPR[rt]._u16[6] != 0) + { + LOG5_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG5_OPCODE("not taken (0x%llx)", target); + } } void STOPD(u32 rc, u32 ra, u32 rb) { + UNIMPLEMENTED(); Emu.Pause(); } void STQX(u32 rt, u32 ra, u32 rb) @@ -405,14 +348,17 @@ private: } void BI(u32 ra) { - CPU.SetBranch(branchTarget(CPU.GPR[ra]._u32[3], 0)); + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); + LOG5_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void BISL(u32 rt, u32 ra) { - const u32 NewPC = CPU.GPR[ra]._u32[3]; + u64 target = branchTarget(CPU.GPR[ra]._u32[3], 0); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(NewPC, 0)); + LOG5_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void IRET(u32 ra) { @@ -1088,9 +1034,13 @@ private: CPU.GPR[rt]._u32[i] = (CPU.GPR[ra]._u32[i] & 0x807fffff) | (exp << 23); - CPU.GPR[rt]._u32[i] = (u32)CPU.GPR[rt]._f[i]; //trunc + if (CPU.GPR[rt]._f[i] > 0x7fffffff) + CPU.GPR[rt]._u32[i] = 0x7fffffff; + else if (CPU.GPR[rt]._f[i] < -pow(2, 31)) + CPU.GPR[rt]._u32[i] = 0x80000000; + else + CPU.GPR[rt]._i32[i] = (s32)CPU.GPR[rt]._f[i]; //trunc } - //CPU.GPR[rt]._m128i = _mm_cvttps_epi32(CPU.GPR[rt]._m128); } void CFLTU(u32 rt, u32 ra, s32 i8) { @@ -1117,7 +1067,6 @@ private: } void CSFLT(u32 rt, u32 ra, s32 i8) { - //CPU.GPR[rt]._m128 = _mm_cvtepi32_ps(CPU.GPR[ra]._m128i); const u32 scale = 155 - (i8 & 0xff); //unsigned immediate for (int i = 0; i < 4; i++) { @@ -1149,8 +1098,16 @@ private: //0 - 8 void BRZ(u32 rt, s32 i16) { + u64 target = branchTarget(CPU.PC, i16); if (CPU.GPR[rt]._u32[3] == 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + { + LOG5_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG5_OPCODE("not taken (0x%llx)", target); + } } void STQA(u32 rt, s32 i16) { @@ -1166,18 +1123,42 @@ private: } void BRNZ(u32 rt, s32 i16) { + u64 target = branchTarget(CPU.PC, i16); if (CPU.GPR[rt]._u32[3] != 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + { + LOG5_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG5_OPCODE("not taken (0x%llx)", target); + } } void BRHZ(u32 rt, s32 i16) { - if (CPU.GPR[rt]._u16[6] == 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + u64 target = branchTarget(CPU.PC, i16); + if (CPU.GPR[rt]._u16[6] == 0) + { + LOG5_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG5_OPCODE("not taken (0x%llx)", target); + } } void BRHNZ(u32 rt, s32 i16) { - if (CPU.GPR[rt]._u16[6] != 0) - CPU.SetBranch(branchTarget(CPU.PC, i16)); + u64 target = branchTarget(CPU.PC, i16); + if (CPU.GPR[rt]._u16[6] != 0) + { + LOG5_OPCODE("taken (0x%llx)", target); + CPU.SetBranch(target); + } + else + { + LOG5_OPCODE("not taken (0x%llx)", target); + } } void STQR(u32 rt, s32 i16) { @@ -1193,7 +1174,9 @@ private: } void BRA(s32 i16) { - CPU.SetBranch(branchTarget(0, i16)); + u64 target = branchTarget(0, i16); + LOG5_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void LQA(u32 rt, s32 i16) { @@ -1209,13 +1192,17 @@ private: } void BRASL(u32 rt, s32 i16) { + u64 target = branchTarget(0, i16); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(0, i16)); + LOG5_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void BR(s32 i16) { - CPU.SetBranch(branchTarget(CPU.PC, i16)); + u64 target = branchTarget(CPU.PC, i16); + LOG5_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void FSMBI(u32 rt, s32 i16) { @@ -1235,9 +1222,11 @@ private: } void BRSL(u32 rt, s32 i16) { + u64 target = branchTarget(CPU.PC, i16); CPU.GPR[rt].Reset(); CPU.GPR[rt]._u32[3] = CPU.PC + 4; - CPU.SetBranch(branchTarget(CPU.PC, i16)); + LOG5_OPCODE("branch (0x%llx)", target); + CPU.SetBranch(target); } void LQR(u32 rt, s32 i16) { @@ -1337,6 +1326,7 @@ private: Emu.Pause(); return; } + //ConLog.Write("STQD(lsa=0x%x): GPR[%d] (0x%llx%llx)", lsa, rt, CPU.GPR[rt]._u64[1], CPU.GPR[rt]._u64[0]); CPU.WriteLS128(lsa, CPU.GPR[rt]._u128); } void LQD(u32 rt, s32 i10, u32 ra) //i10 is shifted left by 4 while decoding diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h new file mode 100644 index 0000000000..4d07456c0c --- /dev/null +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -0,0 +1,3770 @@ +#pragma once + +#include "Emu/Cell/SPUOpcodes.h" +#include "Emu/Memory/Memory.h" +#include "Emu/Cell/SPUThread.h" +#include "Emu/SysCalls/SysCalls.h" + +#define ASMJIT_STATIC + +#include "asmjit.h" + +using namespace asmjit; +using namespace asmjit::host; + +#define UNIMPLEMENTED() UNK(__FUNCTION__) + +struct g_imm_table_struct +{ + u16 cntb_table[65536]; + + __m128i fsmb_table[65536]; + __m128i fsmh_table[256]; + __m128i fsm_table[16]; + + __m128i sldq_pshufb[32]; + __m128i srdq_pshufb[32]; + __m128i rldq_pshufb[16]; + + g_imm_table_struct() + { + static_assert(offsetof(g_imm_table_struct, cntb_table) == 0, "offsetof(cntb_table) != 0"); + for (u32 i = 0; i < sizeof(cntb_table) / sizeof(cntb_table[0]); i++) + { + u32 cnt_low = 0, cnt_high = 0; + for (u32 j = 0; j < 8; j++) + { + cnt_low += (i >> j) & 1; + cnt_high += (i >> (j + 8)) & 1; + } + cntb_table[i] = (cnt_high << 8) | cnt_low; + } + for (u32 i = 0; i < sizeof(fsm_table) / sizeof(fsm_table[0]); i++) + { + for (u32 j = 0; j < 4; j++) fsm_table[i].m128i_u32[j] = (i & (1 << j)) ? ~0 : 0; + } + for (u32 i = 0; i < sizeof(fsmh_table) / sizeof(fsmh_table[0]); i++) + { + for (u32 j = 0; j < 8; j++) fsmh_table[i].m128i_u16[j] = (i & (1 << j)) ? ~0 : 0; + } + for (u32 i = 0; i < sizeof(fsmb_table) / sizeof(fsmb_table[0]); i++) + { + for (u32 j = 0; j < 16; j++) fsmb_table[i].m128i_u8[j] = (i & (1 << j)) ? ~0 : 0; + } + for (u32 i = 0; i < sizeof(sldq_pshufb) / sizeof(sldq_pshufb[0]); i++) + { + for (u32 j = 0; j < 16; j++) sldq_pshufb[i].m128i_u8[j] = (u8)(j - i); + } + for (u32 i = 0; i < sizeof(srdq_pshufb) / sizeof(srdq_pshufb[0]); i++) + { + for (u32 j = 0; j < 16; j++) srdq_pshufb[i].m128i_u8[j] = (j + i > 15) ? 0xff : (u8)(j + i); + } + for (u32 i = 0; i < sizeof(rldq_pshufb) / sizeof(rldq_pshufb[0]); i++) + { + for (u32 j = 0; j < 16; j++) rldq_pshufb[i].m128i_u8[j] = (u8)(j - i) & 0xf; + } + } +}; + +class SPURecompiler; + +class SPURecompilerCore : public CPUDecoder +{ + SPURecompiler* m_enc; + SPUThread& CPU; + +public: + SPUInterpreter* inter; + JitRuntime runtime; + bool first; + + struct SPURecEntry + { + //u16 host; // absolute position of first instruction of current block (not used now) + u16 count; // count of instructions compiled from current point (and to be checked) + u32 valid; // copy of valid opcode for validation + void* pointer; // pointer to executable memory object + }; + + SPURecEntry entry[0x10000]; + + std::vector<__m128i> imm_table; + + SPURecompilerCore(SPUThread& cpu); + + ~SPURecompilerCore(); + + void Compile(u16 pos); + + virtual void Decode(const u32 code); + + virtual u8 DecodeMemory(const u64 address); +}; + +#define c (*compiler) + +#define cpu_xmm(x) oword_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 16) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 16") +#define cpu_qword(x) qword_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 8) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 8") +#define cpu_dword(x) dword_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 4) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 4") +#define cpu_word(x) word_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 2) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 2") +#define cpu_byte(x) byte_ptr(*cpu_var, (sizeof((*(SPUThread*)nullptr).x) == 1) ? offsetof(SPUThread, x) : throw "sizeof("#x") != 1") + +#define g_imm_xmm(x) oword_ptr(*g_imm_var, offsetof(g_imm_table_struct, x)) +#define g_imm2_xmm(x, y) oword_ptr(*g_imm_var, y, 0, offsetof(g_imm_table_struct, x)) + +#define LOG_OPCODE(...) //ConLog.Write("Compiled "__FUNCTION__"(): "__VA_ARGS__) + +#define LOG3_OPCODE(...) //ConLog.Write("Linked "__FUNCTION__"(): "__VA_ARGS__) + +#define LOG4_OPCODE(...) //c.addComment(fmt::Format("SPU info: "__FUNCTION__"(): "__VA_ARGS__).c_str()) + +#define WRAPPER_BEGIN(a0, a1, a2, a3) struct opwr_##a0 \ +{ \ + static void opcode(u32 a0, u32 a1, u32 a2, u32 a3) \ +{ \ + SPUThread& CPU = *(SPUThread*)GetCurrentCPUThread(); + +#define WRAPPER_END(a0, a1, a2, a3) /*LOG2_OPCODE();*/ } \ +}; \ + /*XmmRelease();*/ \ + if (#a0[0] == 'r') XmmInvalidate(a0); \ + if (#a1[0] == 'r') XmmInvalidate(a1); \ + if (#a2[0] == 'r') XmmInvalidate(a2); \ + if (#a3[0] == 'r') XmmInvalidate(a3); \ + X86X64CallNode* call##a0 = c.call(imm_ptr(&opwr_##a0::opcode), kFuncConvHost, FuncBuilder4()); \ + call##a0->setArg(0, imm_u(a0)); \ + call##a0->setArg(1, imm_u(a1)); \ + call##a0->setArg(2, imm_u(a2)); \ + call##a0->setArg(3, imm_u(a3)); \ + LOG3_OPCODE(/*#a0"=%d, "#a1"=%d, "#a2"=%d, "#a3"=%d", a0, a1, a2, a3*/); + + +class SPURecompiler : public SPUOpcodes +{ +private: + SPUThread& CPU; + SPURecompilerCore& rec; + +public: + Compiler* compiler; + bool do_finalize; + // input: + GpVar* cpu_var; + GpVar* ls_var; + GpVar* imm_var; + GpVar* g_imm_var; + // output: + GpVar* pos_var; + // temporary: + GpVar* addr; + GpVar* qw0; + GpVar* qw1; + GpVar* qw2; + + struct XmmLink + { + XmmVar* data; + s8 reg; + bool taken; + mutable bool got; + mutable u32 access; + + XmmLink() + : data(nullptr) + , reg(-1) + , taken(false) + , got(false) + , access(0) + { + } + + const XmmVar& get() const + { + assert(data); + assert(taken); + if (!taken) throw "XmmLink::get(): wrong use"; + got = true; + return *data; + } + + const XmmVar& read() const + { + assert(data); + return *data; + } + } xmm_var[16]; + + SPURecompiler(SPUThread& cpu, SPURecompilerCore& rec) + : CPU(cpu) + , rec(rec) + , compiler(nullptr) + { + } + + const XmmLink& XmmAlloc(s8 pref = -1) // get empty xmm register + { + if (pref >= 0) for (u32 i = 0; i < 16; i++) + { + if ((xmm_var[i].reg == pref) && !xmm_var[i].taken) + { + xmm_var[i].taken = true; + xmm_var[i].got = false; + xmm_var[i].access = 0; + LOG4_OPCODE("pref(%d) reg taken (i=%d)", pref, i); + return xmm_var[i]; + } + } + for (u32 i = 0; i < 16; i++) + { + if ((xmm_var[i].reg == -1) && !xmm_var[i].taken) + { + xmm_var[i].taken = true; + xmm_var[i].got = false; + xmm_var[i].access = 0; + LOG4_OPCODE("free reg taken (i=%d)", i); + return xmm_var[i]; + } + } + int last = -1, max = -1; + for (u32 i = 0; i < 16; i++) + { + if (!xmm_var[i].taken) + { + if ((int)xmm_var[i].access > max) + { + last = i; + max = xmm_var[i].access; + } + } + } + if (last >= 0) + { + // (saving cached data?) + //c.movdqa(cpu_xmm(GPR[xmm_var[last].reg]), *xmm_var[last].data); + xmm_var[last].taken = true; + xmm_var[last].got = false; + LOG4_OPCODE("cached reg taken (i=%d): GPR[%d] lost", last, xmm_var[last].reg); + xmm_var[last].reg = -1; // ??? + xmm_var[last].access = 0; + return xmm_var[last]; + } + throw "XmmAlloc() failed"; + } + + const XmmLink* XmmRead(const s8 reg) const // get xmm register with specific SPU reg or nullptr + { + assert(reg >= 0); + for (u32 i = 0; i < 16; i++) + { + if (xmm_var[i].reg == reg) + { + assert(!xmm_var[i].got); + if (xmm_var[i].got) throw "XmmRead(): wrong reuse"; + LOG4_OPCODE("GPR[%d] has been read (i=%d)", reg, i); + xmm_var[i].access++; + return &xmm_var[i]; + } + } + LOG4_OPCODE("GPR[%d] not found", reg); + return nullptr; + } + + const XmmLink& XmmGet(s8 reg, s8 target = -1) // get xmm register with specific SPU reg + { + assert(reg >= 0); + XmmLink* res = nullptr; + if (reg == target) + { + for (u32 i = 0; i < 16; i++) + { + if (xmm_var[i].reg == reg) + { + res = &xmm_var[i]; + if (xmm_var[i].taken) throw "XmmGet(): xmm_var is taken"; + xmm_var[i].taken = true; + xmm_var[i].got = false; + //xmm_var[i].reg = -1; + for (u32 j = i + 1; j < 16; j++) + { + if (xmm_var[j].reg == reg) throw "XmmGet(): xmm_var duplicate"; + } + LOG4_OPCODE("cached GPR[%d] used (i=%d)", reg, i); + break; + } + } + } + if (!res) + { + res = &(XmmLink&)XmmAlloc(target); + /*if (target != res->reg) + { + c.movdqa(*res->data, cpu_xmm(GPR[reg])); + } + else*/ + { + if (const XmmLink* source = XmmRead(reg)) + { + c.movdqa(*res->data, source->read()); + } + else + { + c.movdqa(*res->data, cpu_xmm(GPR[reg])); + } + } + res->reg = -1; // ??? + LOG4_OPCODE("* cached GPR[%d] not found", reg); + } + return *res; + } + + const XmmLink& XmmCopy(const XmmLink& from, s8 pref = -1) // XmmAlloc + mov + { + XmmLink* res = &(XmmLink&)XmmAlloc(pref); + c.movdqa(*res->data, *from.data); + res->reg = -1; // ??? + LOG4_OPCODE("*"); + return *res; + } + + void XmmInvalidate(const s8 reg) // invalidate cached register + { + assert(reg >= 0); + for (u32 i = 0; i < 16; i++) + { + if (xmm_var[i].reg == reg) + { + if (xmm_var[i].taken) throw "XmmInvalidate(): xmm_var is taken"; + LOG4_OPCODE("GPR[%d] invalidated (i=%d)", reg, i); + xmm_var[i].reg = -1; + xmm_var[i].access = 0; + } + } + } + + void XmmFinalize(const XmmLink& var, s8 reg = -1) + { + // invalidation + if (reg >= 0) for (u32 i = 0; i < 16; i++) + { + if (xmm_var[i].reg == reg) + { + LOG4_OPCODE("GPR[%d] invalidated (i=%d)", reg, i); + xmm_var[i].reg = -1; + xmm_var[i].access = 0; + } + } + for (u32 i = 0; i < 16; i++) + { + if (xmm_var[i].data == var.data) + { + assert(xmm_var[i].taken); + // save immediately: + if (reg >= 0) + { + c.movdqa(cpu_xmm(GPR[reg]), *xmm_var[i].data); + } + else + { + } + LOG4_OPCODE("GPR[%d] finalized (i=%d), GPR[%d] replaced", reg, i, xmm_var[i].reg); + // (to disable caching:) + //reg = -1; + xmm_var[i].reg = reg; + xmm_var[i].taken = false; + xmm_var[i].got = false; + xmm_var[i].access = 0; + return; + } + } + assert(false); + } + + void XmmRelease() + { + for (u32 i = 0; i < 16; i++) + { + if (xmm_var[i].reg >= 0) + { + //c.movdqa(cpu_xmm(GPR[xmm_var[i].reg]), *xmm_var[i].data); + LOG4_OPCODE("GPR[%d] released (i=%d)", xmm_var[i].reg, i); + xmm_var[i].reg = -1; + xmm_var[i].access = 0; + } + } + } + + Mem XmmConst(const __m128i data) + { + for (u32 i = 0; i < rec.imm_table.size(); i++) + { + if (rec.imm_table[i].m128i_u64[0] == data.m128i_u64[0] && rec.imm_table[i].m128i_u64[1] == data.m128i_u64[1]) + { + return oword_ptr(*imm_var, i * sizeof(__m128i)); + } + } + const int shift = rec.imm_table.size() * sizeof(__m128i); + rec.imm_table.push_back(data); + return oword_ptr(*imm_var, shift); + } + + Mem XmmConst(const __m128 data) + { + return XmmConst((__m128i&)data); + } + +private: + //0 - 10 + void STOP(u32 code) + { + struct STOP_wrapper + { + static void STOP(u32 code) + { + SPUThread& CPU = *(SPUThread*)GetCurrentCPUThread(); + CPU.DoStop(code); + LOG2_OPCODE(); + } + }; + c.mov(cpu_qword(PC), (u32)CPU.PC); + X86X64CallNode* call = c.call(imm_ptr(&STOP_wrapper::STOP), kFuncConvHost, FuncBuilder1()); + call->setArg(0, imm_u(code)); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; + LOG_OPCODE(); + } + void LNOP() + { + LOG_OPCODE(); + } + void SYNC(u32 Cbit) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + // This instruction must be used following a store instruction that modifies the instruction stream. + c.mfence(); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + do_finalize = true; + LOG_OPCODE(); + } + void DSYNC() + { + // This instruction forces all earlier load, store, and channel instructions to complete before proceeding. + c.mfence(); + LOG_OPCODE(); + } + void MFSPR(u32 rt, u32 sa) + { + UNIMPLEMENTED(); + //If register is a dummy register (register labeled 0x0) + if(sa == 0x0) + { + CPU.GPR[rt]._u128.hi = 0x0; + CPU.GPR[rt]._u128.lo = 0x0; + } + else + { + CPU.GPR[rt]._u128.hi = CPU.SPR[sa]._u128.hi; + CPU.GPR[rt]._u128.lo = CPU.SPR[sa]._u128.lo; + } + } + void RDCH(u32 rt, u32 ra) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + WRAPPER_BEGIN(rt, ra, yy, zz); + CPU.ReadChannel(CPU.GPR[rt], ra); + WRAPPER_END(rt, ra, 0, 0); + // TODO + } + void RCHCNT(u32 rt, u32 ra) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + WRAPPER_BEGIN(rt, ra, yy, zz); + CPU.GPR[rt].Reset(); + CPU.GPR[rt]._u32[3] = CPU.GetChannelCount(ra); + WRAPPER_END(rt, ra, 0, 0); + // TODO + } + void SF(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + // sub from + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.psubd(vb.get(), va->read()); + } + else + { + c.psubd(vb.get(), cpu_xmm(GPR[ra])); + } + XmmFinalize(vb, rt); + } + LOG_OPCODE(); + } + void OR(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // or + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.por(vb.get(), va->read()); + } + else + { + c.por(vb.get(), cpu_xmm(GPR[ra])); + } + XmmFinalize(vb, rt); + } + LOG_OPCODE(); + } + void BG(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& v1 = XmmAlloc(rt); + c.movdqa(v1.get(), XmmConst(_mm_set1_epi32(1))); + XmmFinalize(v1, rt); + } + else + { + // compare if-greater-than + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = XmmGet(rb); + c.psubd(va.get(), XmmConst(_mm_set1_epi32(0x80000000))); + c.psubd(vb.get(), XmmConst(_mm_set1_epi32(0x80000000))); + c.pcmpgtd(va.get(), vb.get()); + c.paddd(va.get(), XmmConst(_mm_set1_epi32(1))); + XmmFinalize(va, rt); + XmmFinalize(vb); + // sign bits: + // a b (b-a) -> (result of BG) + // 0 0 0 -> 1 + // 0 0 1 -> 0 + // 0 1 0 -> 1 + // 0 1 1 -> 1 + // 1 0 0 -> 0 + // 1 0 1 -> 0 + // 1 1 0 -> 0 + // 1 1 1 -> 1 + } + LOG_OPCODE(); + } + void SFH(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.psubw(vb.get(), va->read()); + } + else + { + c.psubw(vb.get(), cpu_xmm(GPR[ra])); + } + XmmFinalize(vb, rt); + } + LOG_OPCODE(); + } + void NOR(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + if (ra != rb) + { + if (const XmmLink* vb = XmmRead(rb)) + { + c.por(va.get(), vb->read()); + } + else + { + c.por(va.get(), cpu_xmm(GPR[rb])); + } + } + c.pxor(va.get(), XmmConst(_mm_set1_epi32(-1))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void ABSDB(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = XmmGet(rb); + const XmmLink& vm = XmmCopy(va); + c.pmaxub(va.get(), vb.get()); + c.pminub(vb.get(), vm.get()); + c.psubb(va.get(), vb.get()); + XmmFinalize(va, rt); + XmmFinalize(vb); + XmmFinalize(vm); + } + LOG_OPCODE(); + } + void ROT(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 4; i++) + { + c.mov(qw0->r32(), cpu_dword(GPR[ra]._u32[i])); + c.mov(*addr, cpu_dword(GPR[rb]._u32[i])); + c.rol(qw0->r32(), *addr); + c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32()); + } + LOG_OPCODE(); + } + void ROTM(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 4; i++) + { + c.mov(qw0->r32(), cpu_dword(GPR[ra]._u32[i])); + c.mov(*addr, cpu_dword(GPR[rb]._u32[i])); + c.neg(*addr); + c.shr(*qw0, *addr); + c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32()); + } + LOG_OPCODE(); + } + void ROTMA(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 4; i++) + { + c.movsxd(*qw0, cpu_dword(GPR[ra]._u32[i])); + c.mov(*addr, cpu_dword(GPR[rb]._u32[i])); + c.neg(*addr); + c.sar(*qw0, *addr); + c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32()); + } + LOG_OPCODE(); + } + void SHL(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 4; i++) + { + c.mov(qw0->r32(), cpu_dword(GPR[ra]._u32[i])); + c.mov(*addr, cpu_dword(GPR[rb]._u32[i])); + c.shl(*qw0, *addr); + c.mov(cpu_dword(GPR[rt]._u32[i]), qw0->r32()); + } + LOG_OPCODE(); + } + void ROTH(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 8; i++) + { + c.movzx(qw0->r32(), cpu_word(GPR[ra]._u16[i])); + c.movzx(*addr, cpu_word(GPR[rb]._u16[i])); + c.rol(qw0->r16(), *addr); + c.mov(cpu_word(GPR[rt]._u16[i]), qw0->r16()); + } + LOG_OPCODE(); + } + void ROTHM(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 8; i++) + { + c.movzx(qw0->r32(), cpu_word(GPR[ra]._u16[i])); + c.movzx(*addr, cpu_word(GPR[rb]._u16[i])); + c.neg(*addr); + c.shr(qw0->r32(), *addr); + c.mov(cpu_word(GPR[rt]._u16[i]), qw0->r16()); + } + LOG_OPCODE(); + } + void ROTMAH(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 8; i++) + { + c.movsx(qw0->r32(), cpu_word(GPR[ra]._u16[i])); + c.movzx(*addr, cpu_word(GPR[rb]._u16[i])); + c.neg(*addr); + c.sar(qw0->r32(), *addr); + c.mov(cpu_word(GPR[rt]._u16[i]), qw0->r16()); + } + LOG_OPCODE(); + } + void SHLH(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 8; i++) + { + c.movzx(qw0->r32(), cpu_word(GPR[ra]._u16[i])); + c.movzx(*addr, cpu_word(GPR[rb]._u16[i])); + c.shl(qw0->r32(), *addr); + c.mov(cpu_word(GPR[rt]._u16[i]), qw0->r16()); + } + LOG_OPCODE(); + } + void ROTI(u32 rt, u32 ra, s32 i7) + { + const int s = i7 & 0x1f; + if (s == 0) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& v1 = XmmCopy(va); + c.pslld(va.get(), s); + c.psrld(v1.get(), 32 - s); + c.por(va.get(), v1.get()); + XmmFinalize(va, rt); + XmmFinalize(v1); + } + LOG_OPCODE(); + } + void ROTMI(u32 rt, u32 ra, s32 i7) + { + const int s = (0 - i7) & 0x3f; + if (s > 31) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else if (s == 0) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // shift right logical + const XmmLink& va = XmmGet(ra, rt); + c.psrld(va.get(), s); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void ROTMAI(u32 rt, u32 ra, s32 i7) + { + const int s = (0 - i7) & 0x3f; + if (s == 0) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // shift right arithmetical + const XmmLink& va = XmmGet(ra, rt); + c.psrad(va.get(), s); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void SHLI(u32 rt, u32 ra, s32 i7) + { + const int s = i7 & 0x3f; + if (s > 31) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else if (s == 0) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // shift left + const XmmLink& va = XmmGet(ra, rt); + c.pslld(va.get(), s); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void ROTHI(u32 rt, u32 ra, s32 i7) + { + const int s = i7 & 0xf; + if (s == 0) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& v1 = XmmCopy(va); + c.psllw(va.get(), s); + c.psrlw(v1.get(), 16 - s); + c.por(va.get(), v1.get()); + XmmFinalize(va, rt); + XmmFinalize(v1); + } + LOG_OPCODE(); + } + void ROTHMI(u32 rt, u32 ra, s32 i7) + { + const int s = (0 - i7) & 0x1f; + if (s > 15) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else if (s == 0) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // shift right logical + const XmmLink& va = XmmGet(ra, rt); + c.psrlw(va.get(), s); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void ROTMAHI(u32 rt, u32 ra, s32 i7) + { + const int s = (0 - i7) & 0x1f; + if (s == 0) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // shift right arithmetical + const XmmLink& va = XmmGet(ra, rt); + c.psraw(va.get(), s); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void SHLHI(u32 rt, u32 ra, s32 i7) + { + const int s = i7 & 0x1f; + if (s > 15) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else if (s == 0) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // shift left + const XmmLink& va = XmmGet(ra, rt); + c.psllw(va.get(), s); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void A(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& vb = XmmGet(rb, rt); + c.paddd(vb.get(), vb.get()); + XmmFinalize(vb, rt); + } + else + { + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.paddd(vb.get(), va->read()); + } + else + { + c.paddd(vb.get(), cpu_xmm(GPR[ra])); + } + XmmFinalize(vb, rt); + } + LOG_OPCODE(); + } + void AND(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + if (rt != ra) + { + // mov + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // and + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.pand(vb.get(), va->read()); + } + else + { + c.pand(vb.get(), cpu_xmm(GPR[ra])); + } + XmmFinalize(vb, rt); + } + LOG_OPCODE(); + } + void CG(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.psrld(va.get(), 31); + XmmFinalize(va, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = XmmGet(rb); + c.paddd(vb.get(), va.get()); + c.psubd(va.get(), XmmConst(_mm_set1_epi32(0x80000000))); + c.psubd(vb.get(), XmmConst(_mm_set1_epi32(0x80000000))); + c.pcmpgtd(va.get(), vb.get()); + c.psrld(va.get(), 31); + XmmFinalize(va, rt); + XmmFinalize(vb); + } + LOG_OPCODE(); + } + void AH(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.paddw(va.get(), va.get()); + XmmFinalize(va, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.paddw(va.get(), vb->read()); + } + else + { + c.paddw(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void NAND(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // not + const XmmLink& va = XmmGet(ra, rt); + c.pxor(va.get(), XmmConst(_mm_set1_epi32(-1))); + XmmFinalize(va, rt); + } + else + { + // nand + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.pand(va.get(), vb->read()); + } + else + { + c.pand(va.get(), cpu_xmm(GPR[rb])); + } + c.pxor(va.get(), XmmConst(_mm_set1_epi32(-1))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void AVGB(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vb = XmmGet(rb); + if (const XmmLink* va = XmmRead(ra)) + { + c.pavgb(vb.get(), va->read()); + } + else + { + c.pavgb(vb.get(), cpu_xmm(GPR[ra])); + } + XmmFinalize(vb, rt); + } + void MTSPR(u32 rt, u32 sa) + { + UNIMPLEMENTED(); + if(sa != 0) + { + CPU.SPR[sa]._u128.hi = CPU.GPR[rt]._u128.hi; + CPU.SPR[sa]._u128.lo = CPU.GPR[rt]._u128.lo; + } + } + void WRCH(u32 ra, u32 rt) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + WRAPPER_BEGIN(ra, rt, yy, zz); + CPU.WriteChannel(ra, CPU.GPR[rt]); + WRAPPER_END(ra, rt, 0, 0); + // TODO + + /*XmmInvalidate(rt); + + GpVar v(c, kVarTypeUInt32); + c.mov(v, cpu_dword(GPR[rt]._u32[3])); + switch (ra) + { + case MFC_LSA: + c.mov(cpu_dword(MFC1.LSA.m_value[0]), v); + break; + + case MFC_EAH: + c.mov(cpu_dword(MFC1.EAH.m_value[0]), v); + break; + + case MFC_EAL: + c.mov(cpu_dword(MFC1.EAL.m_value[0]), v); + break; + + case MFC_Size: + c.mov(cpu_word(MFC1.Size_Tag.m_val16[1]), v); + break; + + case MFC_TagID: + c.mov(cpu_word(MFC1.Size_Tag.m_val16[0]), v); + break; + + default: + { + X86X64CallNode* call = c.call(imm_ptr(&WRCH_wrapper::WRCH), kFuncConvHost, FuncBuilder2()); + call->setArg(0, imm_u(ra)); + call->setArg(1, v); + } + }*/ + } + void BIZ(u32 rt, u32 ra) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*addr, (u32)CPU.PC + 4); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); + c.cmovne(*pos_var, *addr); + c.shr(*pos_var, 2); + LOG_OPCODE(); + } + void BINZ(u32 rt, u32 ra) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*addr, (u32)CPU.PC + 4); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); + c.cmove(*pos_var, *addr); + c.shr(*pos_var, 2); + LOG_OPCODE(); + } + void BIHZ(u32 rt, u32 ra) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*addr, (u32)CPU.PC + 4); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.cmp(cpu_word(GPR[rt]._u16[6]), 0); + c.cmovne(*pos_var, *addr); + c.shr(*pos_var, 2); + LOG_OPCODE(); + } + void BIHNZ(u32 rt, u32 ra) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*addr, (u32)CPU.PC + 4); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.cmp(cpu_word(GPR[rt]._u16[6]), 0); + c.cmove(*pos_var, *addr); + c.shr(*pos_var, 2); + LOG_OPCODE(); + } + void STOPD(u32 rc, u32 ra, u32 rb) + { + UNIMPLEMENTED(); + Emu.Pause(); + } + void STQX(u32 rt, u32 ra, u32 rb) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + if (ra == rb) + { + c.add(*addr, *addr); + } + else + { + c.add(*addr, cpu_dword(GPR[rb]._u32[3])); + } + c.and_(*addr, 0x3fff0); + c.mov(*qw0, cpu_qword(GPR[rt]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[rt]._u64[1])); + c.bswap(*qw0); + c.bswap(*qw1); + c.mov(qword_ptr(*ls_var, *addr, 0, 0), *qw1); + c.mov(qword_ptr(*ls_var, *addr, 0, 8), *qw0); + LOG_OPCODE(); + } + void BI(u32 ra) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.shr(*pos_var, 2); + LOG_OPCODE(); + } + void BISL(u32 rt, u32 ra) + { + XmmInvalidate(rt); + + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.xor_(*pos_var, *pos_var); + c.mov(cpu_dword(GPR[rt]._u32[0]), *pos_var); + c.mov(cpu_dword(GPR[rt]._u32[1]), *pos_var); + c.mov(cpu_dword(GPR[rt]._u32[2]), *pos_var); + c.mov(*pos_var, cpu_dword(GPR[ra]._u32[3])); + c.mov(cpu_dword(GPR[rt]._u32[3]), (u32)CPU.PC + 4); + c.shr(*pos_var, 2); + LOG_OPCODE(); + } + void IRET(u32 ra) + { + UNIMPLEMENTED(); + //SetBranch(SRR0); + } + void BISLED(u32 rt, u32 ra) + { + UNIMPLEMENTED(); + } + void HBR(u32 p, u32 ro, u32 ra) + { + LOG_OPCODE(); + } + void GB(u32 rt, u32 ra) + { + const XmmLink& va = XmmGet(ra, rt); + c.pand(va.get(), XmmConst(_mm_set1_epi32(1))); + c.pmullw(va.get(), XmmConst(_mm_set_epi32(8, 4, 2, 1))); + c.phaddd(va.get(), va.get()); + c.phaddd(va.get(), va.get()); + c.pand(va.get(), XmmConst(_mm_set_epi32(0xffffffff, 0, 0, 0))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void GBH(u32 rt, u32 ra) + { + const XmmLink& va = XmmGet(ra, rt); + c.pand(va.get(), XmmConst(_mm_set1_epi16(1))); + c.pmullw(va.get(), XmmConst(_mm_set_epi16(128, 64, 32, 16, 8, 4, 2, 1))); + c.phaddw(va.get(), va.get()); + c.phaddw(va.get(), va.get()); + c.phaddw(va.get(), va.get()); + c.pand(va.get(), XmmConst(_mm_set_epi32(0xffff, 0, 0, 0))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void GBB(u32 rt, u32 ra) + { + const XmmLink& va = XmmGet(ra, rt); + //c.pand(va.get(), XmmConst(_mm_set1_epi8(1))); // ??? + c.pslld(va.get(), 7); + c.pmovmskb(*addr, va.get()); + c.pxor(va.get(), va.get()); + c.pinsrw(va.get(), *addr, 6); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void FSM(u32 rt, u32 ra) + { + const XmmLink& vr = XmmAlloc(rt); + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.and_(*addr, 0xf); + c.shl(*addr, 4); + c.movdqa(vr.get(), g_imm2_xmm(fsm_table[0], *addr)); + XmmFinalize(vr, rt); + LOG_OPCODE(); + } + void FSMH(u32 rt, u32 ra) + { + const XmmLink& vr = XmmAlloc(rt); + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.and_(*addr, 0xff); + c.shl(*addr, 4); + c.movdqa(vr.get(), g_imm2_xmm(fsmh_table[0], *addr)); + XmmFinalize(vr, rt); + LOG_OPCODE(); + } + void FSMB(u32 rt, u32 ra) + { + const XmmLink& vr = XmmAlloc(rt); + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.and_(*addr, 0xffff); + c.shl(*addr, 4); + c.movdqa(vr.get(), g_imm2_xmm(fsmb_table[0], *addr)); + XmmFinalize(vr, rt); + LOG_OPCODE(); + } + void FREST(u32 rt, u32 ra) + { + const XmmLink& va = XmmGet(ra, rt); + c.rcpps(va.get(), va.get()); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void FRSQEST(u32 rt, u32 ra) + { + const XmmLink& va = XmmGet(ra, rt); + c.andps(va.get(), XmmConst(_mm_set1_epi32(0x7fffffff))); // abs + c.rsqrtps(va.get(), va.get()); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void LQX(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + if (ra == rb) + { + c.add(*addr, *addr); + } + else + { + c.add(*addr, cpu_dword(GPR[rb]._u32[3])); + } + c.and_(*addr, 0x3fff0); + c.mov(*qw0, qword_ptr(*ls_var, *addr, 0, 0)); + c.mov(*qw1, qword_ptr(*ls_var, *addr, 0, 8)); + c.bswap(*qw0); + c.bswap(*qw1); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw1); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw0); + LOG_OPCODE(); + } + void ROTQBYBI(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mov(*addr, cpu_dword(GPR[rb]._u32[3])); + c.and_(*addr, 0xf << 3); + c.shl(*addr, 1); + c.pshufb(va.get(), g_imm2_xmm(rldq_pshufb[0], *addr)); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void ROTQMBYBI(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mov(*addr, cpu_dword(GPR[rb]._u32[3])); + c.shr(*addr, 3); + c.neg(*addr); + c.and_(*addr, 0x1f); + c.shl(*addr, 4); + c.pshufb(va.get(), g_imm2_xmm(srdq_pshufb[0], *addr)); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void SHLQBYBI(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mov(*addr, cpu_dword(GPR[rb]._u32[3])); + c.and_(*addr, 0x1f << 3); + c.shl(*addr, 1); + c.pshufb(va.get(), g_imm2_xmm(sldq_pshufb[0], *addr)); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CBX(u32 rt, u32 ra, u32 rb) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + if (ra == rb) + { + c.add(*addr, *addr); + } + else + { + c.add(*addr, cpu_dword(GPR[rb]._u32[3])); + } + c.and_(*addr, 0xf); + c.neg(*addr); + c.add(*addr, 0xf); + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); + XmmFinalize(vr, rt); + XmmInvalidate(rt); + c.mov(byte_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u8[0])), 0x03); + LOG_OPCODE(); + } + void CHX(u32 rt, u32 ra, u32 rb) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + if (ra == rb) + { + c.add(*addr, *addr); + } + else + { + c.add(*addr, cpu_dword(GPR[rb]._u32[3])); + } + c.and_(*addr, 0xe); + c.neg(*addr); + c.add(*addr, 0xe); + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); + XmmFinalize(vr, rt); + XmmInvalidate(rt); + c.mov(word_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u16[0])), 0x0203); + LOG_OPCODE(); + } + void CWX(u32 rt, u32 ra, u32 rb) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + if (ra == rb) + { + c.add(*addr, *addr); + } + else + { + c.add(*addr, cpu_dword(GPR[rb]._u32[3])); + } + c.and_(*addr, 0xc); + c.neg(*addr); + c.add(*addr, 0xc); + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); + XmmFinalize(vr, rt); + XmmInvalidate(rt); + c.mov(dword_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u32[0])), 0x00010203); + LOG_OPCODE(); + } + void CDX(u32 rt, u32 ra, u32 rb) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + if (ra == rb) + { + c.add(*addr, *addr); + } + else + { + c.add(*addr, cpu_dword(GPR[rb]._u32[3])); + } + c.and_(*addr, 0x8); + c.neg(*addr); + c.add(*addr, 0x8); + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); + XmmFinalize(vr, rt); + XmmInvalidate(rt); + c.mov(dword_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u32[0])), 0x00010203); + c.mov(dword_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u32[1])), 0x04050607); + LOG_OPCODE(); + } + void ROTQBI(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + c.mov(*qw0, cpu_qword(GPR[ra]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[ra]._u64[1])); + c.mov(*qw2, *qw0); + c.mov(*addr, cpu_dword(GPR[rb]._u32[3])); + c.and_(*addr, 7); + c.shld(*qw0, *qw1, *addr); + c.shld(*qw1, *qw2, *addr); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1); + LOG_OPCODE(); + } + void ROTQMBI(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + c.mov(*qw0, cpu_qword(GPR[ra]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[ra]._u64[1])); + c.mov(*addr, cpu_dword(GPR[rb]._u32[3])); + c.neg(*addr); + c.and_(*addr, 7); + c.shrd(*qw0, *qw1, *addr); + c.shr(*qw1, *addr); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1); + LOG_OPCODE(); + } + void SHLQBI(u32 rt, u32 ra, u32 rb) + { + XmmInvalidate(rt); + c.mov(*qw0, cpu_qword(GPR[ra]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[ra]._u64[1])); + c.mov(*addr, cpu_dword(GPR[rb]._u32[3])); + c.and_(*addr, 7); + c.shld(*qw1, *qw0, *addr); + c.shl(*qw0, *addr); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1); + LOG_OPCODE(); + } + void ROTQBY(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mov(*addr, cpu_dword(GPR[rb]._u32[3])); + c.and_(*addr, 0xf); + c.shl(*addr, 4); + c.pshufb(va.get(), g_imm2_xmm(rldq_pshufb[0], *addr)); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void ROTQMBY(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mov(*addr, cpu_dword(GPR[rb]._u32[3])); + c.neg(*addr); + c.and_(*addr, 0x1f); + c.shl(*addr, 4); + c.pshufb(va.get(), g_imm2_xmm(srdq_pshufb[0], *addr)); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void SHLQBY(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mov(*addr, cpu_dword(GPR[rb]._u32[3])); + c.and_(*addr, 0x1f); + c.shl(*addr, 4); + c.pshufb(va.get(), g_imm2_xmm(sldq_pshufb[0], *addr)); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void ORX(u32 rt, u32 ra) + { + XmmInvalidate(rt); + c.mov(*addr, cpu_dword(GPR[ra]._u32[0])); + c.or_(*addr, cpu_dword(GPR[ra]._u32[1])); + c.or_(*addr, cpu_dword(GPR[ra]._u32[2])); + c.or_(*addr, cpu_dword(GPR[ra]._u32[3])); + c.mov(cpu_dword(GPR[rt]._u32[3]), *addr); + c.xor_(*addr, *addr); + c.mov(cpu_dword(GPR[rt]._u32[0]), *addr); + c.mov(cpu_dword(GPR[rt]._u32[1]), *addr); + c.mov(cpu_dword(GPR[rt]._u32[2]), *addr); + LOG_OPCODE(); + } + void CBD(u32 rt, u32 ra, s32 i7) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.add(*addr, i7); + c.and_(*addr, 0xf); + c.neg(*addr); + c.add(*addr, 0xf); + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); + XmmFinalize(vr, rt); + XmmInvalidate(rt); + c.mov(byte_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u8[0])), 0x03); + LOG_OPCODE(); + } + void CHD(u32 rt, u32 ra, s32 i7) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.add(*addr, i7); + c.and_(*addr, 0xe); + c.neg(*addr); + c.add(*addr, 0xe); + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); + XmmFinalize(vr, rt); + XmmInvalidate(rt); + c.mov(word_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u16[0])), 0x0203); + LOG_OPCODE(); + } + void CWD(u32 rt, u32 ra, s32 i7) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.add(*addr, i7); + c.and_(*addr, 0xc); + c.neg(*addr); + c.add(*addr, 0xc); + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); + XmmFinalize(vr, rt); + XmmInvalidate(rt); + c.mov(dword_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u32[0])), 0x00010203); + LOG_OPCODE(); + } + void CDD(u32 rt, u32 ra, s32 i7) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.add(*addr, i7); + c.and_(*addr, 0x8); + c.neg(*addr); + c.add(*addr, 0x8); + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f))); + XmmFinalize(vr, rt); + XmmInvalidate(rt); + c.mov(dword_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u32[0])), 0x00010203); + c.mov(dword_ptr(*cpu_var, *addr, 0, offsetof(SPUThread, GPR[rt]._u32[1])), 0x04050607); + LOG_OPCODE(); + } + void ROTQBII(u32 rt, u32 ra, s32 i7) + { + XmmInvalidate(rt); + c.mov(*qw0, cpu_qword(GPR[ra]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[ra]._u64[1])); + c.mov(*qw2, *qw0); + c.shld(*qw0, *qw1, i7 & 0x7); + c.shld(*qw1, *qw2, i7 & 0x7); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1); + LOG_OPCODE(); + } + void ROTQMBII(u32 rt, u32 ra, s32 i7) + { + XmmInvalidate(rt); + c.mov(*qw0, cpu_qword(GPR[ra]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[ra]._u64[1])); + c.shrd(*qw0, *qw1, (0 - i7) & 0x7); + c.shr(*qw1, (0 - i7) & 0x7); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1); + LOG_OPCODE(); + } + void SHLQBII(u32 rt, u32 ra, s32 i7) + { + XmmInvalidate(rt); + c.mov(*qw0, cpu_qword(GPR[ra]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[ra]._u64[1])); + c.shld(*qw1, *qw0, i7 & 0x7); + c.shl(*qw0, i7 & 0x7); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw0); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw1); + LOG_OPCODE(); + } + void ROTQBYI(u32 rt, u32 ra, s32 i7) + { + const int s = i7 & 0xf; + if (s == 0) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& v1 = XmmCopy(va); + c.pslldq(va.get(), s); + c.psrldq(v1.get(), 16 - s); + c.por(va.get(), v1.get()); + XmmFinalize(va, rt); + XmmFinalize(v1); + } + LOG_OPCODE(); + } + void ROTQMBYI(u32 rt, u32 ra, s32 i7) + { + const int s = (0 - i7) & 0x1f; + if (s == 0) + { + if (ra != rt) + { + // mov + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else if (s > 15) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + // shift right + const XmmLink& va = XmmGet(ra, rt); + c.psrldq(va.get(), s); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void SHLQBYI(u32 rt, u32 ra, s32 i7) + { + const int s = i7 & 0x1f; + if (s == 0) + { + if (ra != rt) + { + // mov + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else if (s > 15) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + // shift left + const XmmLink& va = XmmGet(ra, rt); + c.pslldq(va.get(), s); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void NOP(u32 rt) + { + LOG_OPCODE(); + } + void CGT(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.pcmpgtd(va.get(), vb->read()); + } + else + { + c.pcmpgtd(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void XOR(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + // xor + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.pxor(va.get(), vb->read()); + } + else + { + c.pxor(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void CGTH(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.pcmpgtw(va.get(), vb->read()); + } + else + { + c.pcmpgtw(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void EQV(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqd(v1.get(), v1.get()); + XmmFinalize(v1, rt); + } + else + { + const XmmLink& vb = XmmGet(rb, rt); + c.pxor(vb.get(), XmmConst(_mm_set1_epi32(-1))); + if (const XmmLink* va = XmmRead(ra)) + { + c.pxor(vb.get(), va->read()); + } + else + { + c.pxor(vb.get(), cpu_xmm(GPR[ra])); + } + XmmFinalize(vb, rt); + } + LOG_OPCODE(); + } + void CGTB(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.pcmpgtb(va.get(), vb->read()); + } + else + { + c.pcmpgtb(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void SUMB(u32 rt, u32 ra, u32 rb) + { + /*WRAPPER_BEGIN(rt, ra, rb, zz); + const SPU_GPR_hdr _a = CPU.GPR[ra]; + const SPU_GPR_hdr _b = CPU.GPR[rb]; + for (int w = 0; w < 4; w++) + { + CPU.GPR[rt]._u16[w*2] = _a._u8[w*4] + _a._u8[w*4 + 1] + _a._u8[w*4 + 2] + _a._u8[w*4 + 3]; + CPU.GPR[rt]._u16[w*2 + 1] = _b._u8[w*4] + _b._u8[w*4 + 1] + _b._u8[w*4 + 2] + _b._u8[w*4 + 3]; + } + WRAPPER_END(rt, ra, rb, 0);*/ + + const XmmLink& va = XmmGet(ra); + const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); + const XmmLink& v1 = XmmCopy(vb, rt); + const XmmLink& v2 = XmmCopy(vb); + const XmmLink& vFF = XmmAlloc(); + c.movdqa(vFF.get(), XmmConst(_mm_set1_epi32(0xff))); + c.pand(v1.get(), vFF.get()); + c.psrld(v2.get(), 8); + c.pand(v2.get(), vFF.get()); + c.paddd(v1.get(), v2.get()); + c.movdqa(v2.get(), vb.get()); + c.psrld(v2.get(), 16); + c.pand(v2.get(), vFF.get()); + c.paddd(v1.get(), v2.get()); + c.movdqa(v2.get(), vb.get()); + c.psrld(v2.get(), 24); + c.paddd(v1.get(), v2.get()); + c.pslld(v1.get(), 16); + c.movdqa(v2.get(), va.get()); + c.pand(v2.get(), vFF.get()); + c.por(v1.get(), v2.get()); + c.movdqa(v2.get(), va.get()); + c.psrld(v2.get(), 8); + c.pand(v2.get(), vFF.get()); + c.paddd(v1.get(), v2.get()); + c.movdqa(v2.get(), va.get()); + c.psrld(v2.get(), 16); + c.pand(v2.get(), vFF.get()); + c.paddd(v1.get(), v2.get()); + c.movdqa(v2.get(), va.get()); + c.psrld(v2.get(), 24); + c.paddd(v1.get(), v2.get()); + XmmFinalize(vb); + XmmFinalize(va); + XmmFinalize(v1, rt); + XmmFinalize(v2); + XmmFinalize(vFF); + LOG_OPCODE(); + } + //HGT uses signed values. HLGT uses unsigned values + void HGT(u32 rt, s32 ra, s32 rb) + { + c.mov(*addr, cpu_dword(GPR[ra]._i32[3])); + c.cmp(*addr, cpu_dword(GPR[rb]._i32[3])); + c.mov(*addr, 0); + c.setg(*addr); + c.neg(*addr); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + c.xor_(*pos_var, *addr); + do_finalize = true; + LOG_OPCODE(); + } + void CLZ(u32 rt, u32 ra) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 4; i++) + { + c.bsr(*addr, cpu_dword(GPR[ra]._u32[i])); + c.cmovz(*addr, dword_ptr(*g_imm_var, offsetof(g_imm_table_struct, fsmb_table[0xffff]))); // load 0xffffffff + c.neg(*addr); + c.add(*addr, 31); + c.mov(cpu_dword(GPR[rt]._u32[i]), *addr); + } + LOG_OPCODE(); + } + void XSWD(u32 rt, u32 ra) + { + c.movsxd(*qw0, cpu_dword(GPR[ra]._i32[0])); + c.movsxd(*qw1, cpu_dword(GPR[ra]._i32[2])); + c.mov(cpu_qword(GPR[rt]._i64[0]), *qw0); + c.mov(cpu_qword(GPR[rt]._i64[1]), *qw1); + XmmInvalidate(rt); + LOG_OPCODE(); + } + void XSHW(u32 rt, u32 ra) + { + const XmmLink& va = XmmGet(ra, rt); + c.pslld(va.get(), 16); + c.psrad(va.get(), 16); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CNTB(u32 rt, u32 ra) + { + XmmInvalidate(rt); + for (u32 i = 0; i < 8; i++) + { + c.movzx(*addr, cpu_word(GPR[ra]._u16[i])); + c.movzx(*addr, word_ptr(*g_imm_var, *addr, 1, offsetof(g_imm_table_struct, cntb_table[0]))); + c.mov(cpu_word(GPR[rt]._u16[i]), addr->r16()); + } + LOG_OPCODE(); + } + void XSBH(u32 rt, u32 ra) + { + const XmmLink& va = XmmGet(ra, rt); + c.psllw(va.get(), 8); + c.psraw(va.get(), 8); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CLGT(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + // compare if-greater-than + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = XmmGet(rb); + c.psubd(va.get(), XmmConst(_mm_set1_epi32(0x80000000))); + c.psubd(vb.get(), XmmConst(_mm_set1_epi32(0x80000000))); + c.pcmpgtd(va.get(), vb.get()); + XmmFinalize(va, rt); + XmmFinalize(vb); + } + LOG_OPCODE(); + } + void ANDC(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + // and not + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.pandn(vb.get(), va->read()); + } + else + { + c.pandn(vb.get(), cpu_xmm(GPR[ra])); + } + XmmFinalize(vb, rt); + } + LOG_OPCODE(); + } + void FCGT(u32 rt, u32 ra, u32 rb) + { + // reverted less-than + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.cmpps(vb.get(), va->read(), 1); + } + else + { + c.cmpps(vb.get(), cpu_xmm(GPR[ra]), 1); + } + XmmFinalize(vb, rt); + LOG_OPCODE(); + } + void DFCGT(u32 rt, u32 ra, u32 rb) + { + // reverted less-than + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.cmppd(vb.get(), va->read(), 1); + } + else + { + c.cmppd(vb.get(), cpu_xmm(GPR[ra]), 1); + } + XmmFinalize(vb, rt); + LOG_OPCODE(); + } + void FA(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + if (ra == rb) + { + c.addps(va.get(), va.get()); + } + else + { + if (const XmmLink* vb = XmmRead(rb)) + { + c.addps(va.get(), vb->read()); + } + else + { + c.addps(va.get(), cpu_xmm(GPR[rb])); + } + } + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void FS(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero (?) + const XmmLink& v0 = XmmAlloc(rt); + c.subps(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.subps(va.get(), vb->read()); + } + else + { + c.subps(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void FM(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mulps(va.get(), va.get()); + XmmFinalize(va, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.mulps(va.get(), vb->read()); + } + else + { + c.mulps(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void CLGTH(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + // compare if-greater-than + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = XmmGet(rb); + c.psubw(va.get(), XmmConst(_mm_set1_epi32(0x80008000))); + c.psubw(vb.get(), XmmConst(_mm_set1_epi32(0x80008000))); + c.pcmpgtw(va.get(), vb.get()); + XmmFinalize(va, rt); + XmmFinalize(vb); + } + LOG_OPCODE(); + } + void ORC(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqd(v1.get(), v1.get()); + XmmFinalize(v1, rt); + } + else + { + const XmmLink& vb = XmmGet(rb, rt); + c.pxor(vb.get(), XmmConst(_mm_set1_epi32(-1))); + if (const XmmLink* va = XmmRead(ra)) + { + c.por(vb.get(), va->read()); + } + else + { + c.por(vb.get(), cpu_xmm(GPR[ra])); + } + XmmFinalize(vb, rt); + } + LOG_OPCODE(); + } + void FCMGT(u32 rt, u32 ra, u32 rb) + { + // reverted less-than + const XmmLink& vb = XmmGet(rb, rt); + const XmmLink& va = XmmGet(ra); + c.andps(vb.get(), XmmConst(_mm_set1_epi32(0x7fffffff))); // abs + c.andps(va.get(), XmmConst(_mm_set1_epi32(0x7fffffff))); // abs + c.cmpps(vb.get(), va.get(), 1); + XmmFinalize(vb, rt); + XmmFinalize(va); + LOG_OPCODE(); + } + void DFCMGT(u32 rt, u32 ra, u32 rb) + { + // reverted less-than + const XmmLink& vb = XmmGet(rb, rt); + const XmmLink& va = XmmGet(ra); + c.andpd(vb.get(), XmmConst(_mm_set_epi32(0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff))); // abs + c.andpd(va.get(), XmmConst(_mm_set_epi32(0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff))); // abs + c.cmppd(vb.get(), va.get(), 1); + XmmFinalize(vb, rt); + XmmFinalize(va); + LOG_OPCODE(); + } + void DFA(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + if (ra == rb) + { + c.addpd(va.get(), va.get()); + } + else + { + if (const XmmLink* vb = XmmRead(rb)) + { + c.addpd(va.get(), vb->read()); + } + else + { + c.addpd(va.get(), cpu_xmm(GPR[rb])); + } + } + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void DFS(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero (?) + const XmmLink& v0 = XmmAlloc(rt); + c.subpd(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.subpd(va.get(), vb->read()); + } + else + { + c.subpd(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void DFM(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mulpd(va.get(), va.get()); + XmmFinalize(va, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.mulpd(va.get(), vb->read()); + } + else + { + c.mulpd(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void CLGTB(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + // compare if-greater-than + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = XmmGet(rb); + c.psubb(va.get(), XmmConst(_mm_set1_epi32(0x80808080))); + c.psubb(vb.get(), XmmConst(_mm_set1_epi32(0x80808080))); + c.pcmpgtb(va.get(), vb.get()); + XmmFinalize(va, rt); + XmmFinalize(vb); + } + LOG_OPCODE(); + } + void HLGT(u32 rt, u32 ra, u32 rb) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.cmp(*addr, cpu_dword(GPR[rb]._u32[3])); + c.mov(*addr, 0); + c.seta(*addr); + c.neg(*addr); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + c.xor_(*pos_var, *addr); + do_finalize = true; + LOG_OPCODE(); + } + void DFMA(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vr = XmmGet(rt, rt); + const XmmLink& va = XmmGet(ra); + c.mulpd(va.get(), cpu_xmm(GPR[rb])); + c.addpd(vr.get(), va.get()); + XmmFinalize(vr, rt); + XmmFinalize(va); + LOG_OPCODE(); + } + void DFMS(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vr = XmmGet(rt, rt); + const XmmLink& va = XmmGet(ra); + c.mulpd(va.get(), cpu_xmm(GPR[rb])); + c.xorpd(vr.get(), XmmConst(_mm_set_epi32(0x80000000, 0, 0x80000000, 0))); // neg + c.addpd(vr.get(), va.get()); + XmmFinalize(vr, rt); + XmmFinalize(va); + LOG_OPCODE(); + } + void DFNMS(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vr = XmmGet(rt, rt); + const XmmLink& va = XmmGet(ra); + c.mulpd(va.get(), cpu_xmm(GPR[rb])); + c.subpd(vr.get(), va.get()); + XmmFinalize(vr, rt); + XmmFinalize(va); + LOG_OPCODE(); + } + void DFNMA(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vr = XmmGet(rt, rt); + const XmmLink& va = XmmGet(ra); + c.mulpd(va.get(), cpu_xmm(GPR[rb])); + c.addpd(vr.get(), va.get()); + c.xorpd(vr.get(), XmmConst(_mm_set_epi32(0x80000000, 0, 0x80000000, 0))); // neg + XmmFinalize(vr, rt); + XmmFinalize(va); + LOG_OPCODE(); + } + void CEQ(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqd(v1.get(), v1.get()); + XmmFinalize(v1, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.pcmpeqd(va.get(), vb->read()); + } + else + { + c.pcmpeqd(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void MPYHHU(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); + c.psrld(va.get(), 16); + c.psrld(vb.get(), 16); + c.pmulld(va.get(), vb.get()); + XmmFinalize(va, rt); + XmmFinalize(vb); + LOG_OPCODE(); + } + void ADDX(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vt = XmmGet(rt); + c.pand(vt.get(), XmmConst(_mm_set1_epi32(1))); + c.paddd(vt.get(), cpu_xmm(GPR[ra])); + c.paddd(vt.get(), cpu_xmm(GPR[rb])); + XmmFinalize(vt, rt); + LOG_OPCODE(); + } + void SFX(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vt = XmmGet(rt); + if (ra == rb) + { + // load zero + const XmmLink& v0 = XmmAlloc(rt); + c.pandn(vt.get(), XmmConst(_mm_set1_epi32(1))); + c.pxor(v0.get(), v0.get()); + c.psubd(v0.get(), vt.get()); + XmmFinalize(v0, rt); + } + else + { + // sub + const XmmLink& vb = XmmGet(rb, rt); + c.pandn(vt.get(), XmmConst(_mm_set1_epi32(1))); + c.psubd(vb.get(), cpu_xmm(GPR[ra])); + c.psubd(vb.get(), vt.get()); + XmmFinalize(vb, rt); + } + XmmFinalize(vt); + LOG_OPCODE(); + } + void CGX(u32 rt, u32 ra, u32 rb) //nf + { + WRAPPER_BEGIN(rt, ra, rb, zz); + for (int w = 0; w < 4; w++) + CPU.GPR[rt]._u32[w] = ((u64)CPU.GPR[ra]._u32[w] + (u64)CPU.GPR[rb]._u32[w] + (u64)(CPU.GPR[rt]._u32[w] & 1)) >> 32; + WRAPPER_END(rt, ra, rb, 0); + } + void BGX(u32 rt, u32 ra, u32 rb) //nf + { + WRAPPER_BEGIN(rt, ra, rb, zz); + s64 nResult; + + for (int w = 0; w < 4; w++) + { + nResult = (u64)CPU.GPR[rb]._u32[w] - (u64)CPU.GPR[ra]._u32[w] - (u64)(1 - (CPU.GPR[rt]._u32[w] & 1)); + CPU.GPR[rt]._u32[w] = nResult < 0 ? 0 : 1; + } + WRAPPER_END(rt, ra, rb, 0); + } + void MPYHHA(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vt = XmmGet(rt, rt); + const XmmLink& va = XmmGet(ra); + const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); + c.psrad(va.get(), 16); + c.psrad(vb.get(), 16); + c.pmulld(va.get(), vb.get()); + c.paddd(vt.get(), va.get()); + XmmFinalize(vt, rt); + XmmFinalize(va); + XmmFinalize(vb); + LOG_OPCODE(); + } + void MPYHHAU(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vt = XmmGet(rt, rt); + const XmmLink& va = XmmGet(ra); + const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); + c.psrld(va.get(), 16); + c.psrld(vb.get(), 16); + c.pmulld(va.get(), vb.get()); + c.paddd(vt.get(), va.get()); + XmmFinalize(vt, rt); + XmmFinalize(va); + XmmFinalize(vb); + LOG_OPCODE(); + } + void FSCRRD(u32 rt) + { + UNIMPLEMENTED(); + } + void FESD(u32 rt, u32 ra) + { + const XmmLink& va = XmmGet(ra, rt); + c.shufps(va.get(), va.get(), 0x8d); // _f[0] = _f[1]; _f[1] = _f[3]; + c.cvtps2pd(va.get(), va.get()); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void FRDS(u32 rt, u32 ra) + { + const XmmLink& va = XmmGet(ra, rt); + c.cvtpd2ps(va.get(), va.get()); + c.shufps(va.get(), va.get(), 0x72); // _f[1] = _f[0]; _f[3] = _f[1]; _f[0] = _f[2] = 0; + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void FSCRWR(u32 rt, u32 ra) + { + UNIMPLEMENTED(); + } + void DFTSV(u32 rt, u32 ra, s32 i7) //nf + { + WRAPPER_BEGIN(rt, ra, i7, zz); + const u64 DoubleExpMask = 0x7ff0000000000000; + const u64 DoubleFracMask = 0x000fffffffffffff; + const u64 DoubleSignMask = 0x8000000000000000; + const SPU_GPR_hdr temp = CPU.GPR[ra]; + CPU.GPR[rt].Reset(); + if (i7 & 1) //Negative Denorm Check (-, exp is zero, frac is non-zero) + for (int i = 0; i < 2; i++) + { + if (temp._u64[i] & DoubleFracMask) + if ((temp._u64[i] & (DoubleSignMask | DoubleExpMask)) == DoubleSignMask) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 2) //Positive Denorm Check (+, exp is zero, frac is non-zero) + for (int i = 0; i < 2; i++) + { + if (temp._u64[i] & DoubleFracMask) + if ((temp._u64[i] & (DoubleSignMask | DoubleExpMask)) == 0) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 4) //Negative Zero Check (-, exp is zero, frac is zero) + for (int i = 0; i < 2; i++) + { + if (temp._u64[i] == DoubleSignMask) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 8) //Positive Zero Check (+, exp is zero, frac is zero) + for (int i = 0; i < 2; i++) + { + if (temp._u64[i] == 0) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 16) //Negative Infinity Check (-, exp is 0x7ff, frac is zero) + for (int i = 0; i < 2; i++) + { + if (temp._u64[i] == (DoubleSignMask | DoubleExpMask)) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 32) //Positive Infinity Check (+, exp is 0x7ff, frac is zero) + for (int i = 0; i < 2; i++) + { + if (temp._u64[i] == DoubleExpMask) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + if (i7 & 64) //Not-a-Number Check (any sign, exp is 0x7ff, frac is non-zero) + for (int i = 0; i < 2; i++) + { + if (temp._u64[i] & DoubleFracMask) + if ((temp._u64[i] & DoubleExpMask) == DoubleExpMask) + CPU.GPR[rt]._u64[i] = 0xffffffffffffffff; + } + WRAPPER_END(rt, ra, i7, 0); + } + void FCEQ(u32 rt, u32 ra, u32 rb) + { + // compare equal + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.cmpps(vb.get(), va->read(), 0); + } + else + { + c.cmpps(vb.get(), cpu_xmm(GPR[ra]), 0); + } + XmmFinalize(vb, rt); + LOG_OPCODE(); + } + void DFCEQ(u32 rt, u32 ra, u32 rb) + { + // compare equal + const XmmLink& vb = XmmGet(rb, rt); + if (const XmmLink* va = XmmRead(ra)) + { + c.cmppd(vb.get(), va->read(), 0); + } + else + { + c.cmppd(vb.get(), cpu_xmm(GPR[ra]), 0); + } + XmmFinalize(vb, rt); + LOG_OPCODE(); + } + void MPY(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); + c.pslld(va.get(), 16); + c.pslld(vb.get(), 16); + c.psrad(va.get(), 16); + c.psrad(vb.get(), 16); + c.pmulld(va.get(), vb.get()); + XmmFinalize(va, rt); + XmmFinalize(vb); + LOG_OPCODE(); + } + void MPYH(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); + c.psrld(va.get(), 16); + c.pmullw(va.get(), vb.get()); + c.pslld(va.get(), 16); + XmmFinalize(va, rt); + XmmFinalize(vb); + LOG_OPCODE(); + } + void MPYHH(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); + c.psrad(va.get(), 16); + c.psrad(vb.get(), 16); + c.pmulld(va.get(), vb.get()); + XmmFinalize(va, rt); + XmmFinalize(vb); + LOG_OPCODE(); + } + void MPYS(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = (ra == rb) ? XmmCopy(va) : XmmGet(rb); + c.pmulhw(va.get(), vb.get()); + c.pslld(va.get(), 16); + c.psrad(va.get(), 16); + XmmFinalize(va, rt); + XmmFinalize(vb); + LOG_OPCODE(); + } + void CEQH(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqw(v1.get(), v1.get()); + XmmFinalize(v1, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.pcmpeqw(va.get(), vb->read()); + } + else + { + c.pcmpeqw(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void FCMEQ(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vb = XmmGet(rb, rt); + const XmmLink& va = XmmGet(ra); + c.andps(vb.get(), XmmConst(_mm_set1_epi32(0x7fffffff))); // abs + c.andps(va.get(), XmmConst(_mm_set1_epi32(0x7fffffff))); // abs + c.cmpps(vb.get(), va.get(), 0); // == + XmmFinalize(vb, rt); + XmmFinalize(va); + LOG_OPCODE(); + } + void DFCMEQ(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vb = XmmGet(rb, rt); + const XmmLink& va = XmmGet(ra); + c.andpd(vb.get(), XmmConst(_mm_set_epi32(0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff))); // abs + c.andpd(va.get(), XmmConst(_mm_set_epi32(0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff))); // abs + c.cmppd(vb.get(), va.get(), 0); // == + XmmFinalize(vb, rt); + XmmFinalize(va); + LOG_OPCODE(); + } + void MPYU(u32 rt, u32 ra, u32 rb) + { + const XmmLink& va = XmmGet(ra, rt); + if (ra == rb) + { + c.pslld(va.get(), 16); + c.psrld(va.get(), 16); + c.pmulld(va.get(), va.get()); + } + else + { + const XmmLink& v1 = XmmAlloc(); + c.movdqa(v1.get(), XmmConst(_mm_set1_epi32(0xffff))); // load mask + c.pand(va.get(), v1.get()); // clear high words of each dword + c.pand(v1.get(), cpu_xmm(GPR[rb])); + c.pmulld(va.get(), v1.get()); + XmmFinalize(v1); + } + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CEQB(u32 rt, u32 ra, u32 rb) + { + if (ra == rb) + { + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqb(v1.get(), v1.get()); + XmmFinalize(v1, rt); + } + else + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.pcmpeqb(va.get(), vb->read()); + } + else + { + c.pcmpeqb(va.get(), cpu_xmm(GPR[rb])); + } + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void FI(u32 rt, u32 ra, u32 rb) + { + const XmmLink& vb = XmmGet(rb); + XmmFinalize(vb, rt); + LOG_OPCODE(); + } + void HEQ(u32 rt, u32 ra, u32 rb) + { + c.mov(*addr, cpu_dword(GPR[ra]._i32[3])); + c.cmp(*addr, cpu_dword(GPR[rb]._i32[3])); + c.mov(*addr, 0); + c.sete(*addr); + c.neg(*addr); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + c.xor_(*pos_var, *addr); + do_finalize = true; + LOG_OPCODE(); + } + + //0 - 9 + void CFLTS(u32 rt, u32 ra, s32 i8) + { + const XmmLink& va = XmmGet(ra, rt); + if (i8 != 173) + { + c.mulps(va.get(), XmmConst(_mm_set1_ps(pow(2, 173 - (i8 & 0xff))))); // scale + } + c.maxps(va.get(), XmmConst(_mm_set1_ps(-pow(2, 31)))); // saturate + c.minps(va.get(), XmmConst(_mm_set1_ps((float)0x7fffffff))); + c.cvttps2dq(va.get(), va.get()); // convert to ints with truncation + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CFLTU(u32 rt, u32 ra, s32 i8) + { + const XmmLink& va = XmmGet(ra, rt); + if (i8 != 173) + { + c.mulps(va.get(), XmmConst(_mm_set1_ps(pow(2, 173 - (i8 & 0xff))))); // scale + } + c.maxps(va.get(), XmmConst(_mm_set1_ps(0.0f))); // saturate + c.minps(va.get(), XmmConst(_mm_set1_ps((float)0xffffffff))); + const XmmLink& v1 = XmmCopy(va); + c.cmpps(v1.get(), XmmConst(_mm_set1_ps(pow(2, 31))), 5); // generate mask of big values + c.andps(v1.get(), XmmConst(_mm_set1_ps(pow(2, 32)))); // generate correction component + c.subps(va.get(), v1.get()); // subtract correction component + c.cvttps2dq(va.get(), va.get()); // convert to ints with truncation + XmmFinalize(va, rt); + XmmFinalize(v1); + LOG_OPCODE(); + } + void CSFLT(u32 rt, u32 ra, s32 i8) + { + const XmmLink& va = XmmGet(ra, rt); + c.cvtdq2ps(va.get(), va.get()); // convert to floats + if (i8 != 155) + { + c.mulps(va.get(), XmmConst(_mm_set1_ps(pow(2, (i8 & 0xff) - 155)))); // scale + } + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CUFLT(u32 rt, u32 ra, s32 i8) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& v1 = XmmCopy(va); + c.cvtdq2ps(va.get(), va.get()); // convert to floats + c.psrad(v1.get(), 32); // generate mask from sign bit + c.andps(v1.get(), XmmConst(_mm_set1_ps(pow(2, 32)))); // generate correction component + c.addps(va.get(), v1.get()); // add correction component + if (i8 != 155) + { + c.mulps(va.get(), XmmConst(_mm_set1_ps(pow(2, (i8 & 0xff) - 155)))); // scale + } + XmmFinalize(va, rt); + XmmFinalize(v1); + LOG_OPCODE(); + } + + //0 - 8 + void BRZ(u32 rt, s32 i16) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*addr, (CPU.PC >> 2) + 1); + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); + c.cmovne(*pos_var, *addr); + LOG_OPCODE(); + } + void STQA(u32 rt, s32 i16) + { + const u32 lsa = (i16 << 2) & 0x3fff0; + c.mov(*qw0, cpu_qword(GPR[rt]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[rt]._u64[1])); + c.bswap(*qw0); + c.bswap(*qw1); + c.mov(qword_ptr(*ls_var, lsa), *qw1); + c.mov(qword_ptr(*ls_var, lsa + 8), *qw0); + LOG_OPCODE(); + } + void BRNZ(u32 rt, s32 i16) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*addr, (CPU.PC >> 2) + 1); + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + c.cmp(cpu_dword(GPR[rt]._u32[3]), 0); + c.cmove(*pos_var, *addr); + LOG_OPCODE(); + } + void BRHZ(u32 rt, s32 i16) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*addr, (CPU.PC >> 2) + 1); + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + c.cmp(cpu_word(GPR[rt]._u16[6]), 0); + c.cmovnz(*pos_var, *addr); + LOG_OPCODE(); + } + void BRHNZ(u32 rt, s32 i16) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*addr, (CPU.PC >> 2) + 1); + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + c.cmp(cpu_word(GPR[rt]._u16[6]), 0); + c.cmovz(*pos_var, *addr); + LOG_OPCODE(); + } + void STQR(u32 rt, s32 i16) + { + const u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; + c.mov(*qw0, cpu_qword(GPR[rt]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[rt]._u64[1])); + c.bswap(*qw0); + c.bswap(*qw1); + c.mov(qword_ptr(*ls_var, lsa), *qw1); + c.mov(qword_ptr(*ls_var, lsa + 8), *qw0); + LOG_OPCODE(); + } + void BRA(s32 i16) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*pos_var, branchTarget(0, i16) >> 2); + LOG_OPCODE(); + } + void LQA(u32 rt, s32 i16) + { + XmmInvalidate(rt); + + const u32 lsa = (i16 << 2) & 0x3fff0; + c.mov(*qw0, qword_ptr(*ls_var, lsa)); + c.mov(*qw1, qword_ptr(*ls_var, lsa + 8)); + c.bswap(*qw0); + c.bswap(*qw1); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw1); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw0); + LOG_OPCODE(); + } + void BRASL(u32 rt, s32 i16) + { + XmmInvalidate(rt); + + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.xor_(*addr, *addr); // zero + c.mov(cpu_dword(GPR[rt]._u32[0]), *addr); + c.mov(cpu_dword(GPR[rt]._u32[1]), *addr); + c.mov(cpu_dword(GPR[rt]._u32[2]), *addr); + c.mov(cpu_dword(GPR[rt]._u32[3]), (u32)CPU.PC + 4); + c.mov(*pos_var, branchTarget(0, i16) >> 2); + LOG_OPCODE(); + } + void BR(s32 i16) + { + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + LOG_OPCODE(); + } + void FSMBI(u32 rt, s32 i16) + { + if (i16 == 0) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), g_imm_xmm(fsmb_table[i16 & 0xffff])); + XmmFinalize(vr, rt); + } + LOG_OPCODE(); + } + void BRSL(u32 rt, s32 i16) + { + XmmInvalidate(rt); + + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + + c.xor_(*addr, *addr); // zero + c.mov(cpu_dword(GPR[rt]._u32[0]), *addr); + c.mov(cpu_dword(GPR[rt]._u32[1]), *addr); + c.mov(cpu_dword(GPR[rt]._u32[2]), *addr); + c.mov(cpu_dword(GPR[rt]._u32[3]), (u32)CPU.PC + 4); + c.mov(*pos_var, branchTarget(CPU.PC, i16) >> 2); + LOG_OPCODE(); + } + void LQR(u32 rt, s32 i16) + { + XmmInvalidate(rt); + + const u32 lsa = branchTarget(CPU.PC, i16) & 0x3fff0; + c.mov(*qw0, qword_ptr(*ls_var, lsa)); + c.mov(*qw1, qword_ptr(*ls_var, lsa + 8)); + c.bswap(*qw0); + c.bswap(*qw1); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw1); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw0); + LOG_OPCODE(); + } + void IL(u32 rt, s32 i16) + { + const XmmLink& vr = XmmAlloc(rt); + if (i16 == 0) + { + c.pxor(vr.get(), vr.get()); + } + else if (i16 == -1) + { + c.pcmpeqd(vr.get(), vr.get()); + } + else + { + c.movdqa(vr.get(), XmmConst(_mm_set1_epi32(i16))); + } + XmmFinalize(vr, rt); + LOG_OPCODE(); + } + void ILHU(u32 rt, s32 i16) + { + const XmmLink& vr = XmmAlloc(rt); + if (i16 == 0) + { + c.pxor(vr.get(), vr.get()); + } + else + { + c.movdqa(vr.get(), XmmConst(_mm_set1_epi32(i16 << 16))); + } + XmmFinalize(vr, rt); + LOG_OPCODE(); + } + void ILH(u32 rt, s32 i16) + { + const XmmLink& vr = XmmAlloc(rt); + if (i16 == 0) + { + c.pxor(vr.get(), vr.get()); + } + else + { + c.movdqa(vr.get(), XmmConst(_mm_set1_epi16(i16))); + } + XmmFinalize(vr, rt); + LOG_OPCODE(); + } + void IOHL(u32 rt, s32 i16) + { + if (i16 == 0) + { + // nop + } + else + { + const XmmLink& vt = XmmGet(rt, rt); + c.por(vt.get(), XmmConst(_mm_set1_epi32(i16 & 0xffff))); + XmmFinalize(vt, rt); + } + LOG_OPCODE(); + } + + + //0 - 7 + void ORI(u32 rt, u32 ra, s32 i10) + { + if (i10 == -1) + { + // fill with 1 + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqd(v1.get(), v1.get()); + XmmFinalize(v1, rt); + } + else if (i10 == 0) + { + if (rt != ra) + { + // mov + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + const XmmLink& va = XmmGet(ra, rt); + c.por(va.get(), XmmConst(_mm_set1_epi32(i10))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void ORHI(u32 rt, u32 ra, s32 i10) + { + if (i10 == -1) + { + // fill with 1 + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqd(v1.get(), v1.get()); + XmmFinalize(v1, rt); + } + else if (i10 == 0) + { + if (rt != ra) + { + // mov + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + const XmmLink& va = XmmGet(ra, rt); + c.por(va.get(), XmmConst(_mm_set1_epi16(i10))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void ORBI(u32 rt, u32 ra, s32 i10) + { + if (i10 == -1) + { + // fill with 1 + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqd(v1.get(), v1.get()); + XmmFinalize(v1, rt); + } + else if (i10 == 0) + { + if (rt != ra) + { + // mov + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + const XmmLink& va = XmmGet(ra, rt); + c.por(va.get(), XmmConst(_mm_set1_epi8(i10))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void SFI(u32 rt, u32 ra, s32 i10) + { + if (i10 == 0) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + c.psubd(v0.get(), cpu_xmm(GPR[ra])); + XmmFinalize(v0, rt); + } + else if (i10 == -1) + { + // fill with 1 + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqd(v1.get(), v1.get()); + c.psubd(v1.get(), cpu_xmm(GPR[ra])); + XmmFinalize(v1, rt); + } + else + { + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set1_epi32(i10))); + c.psubd(vr.get(), cpu_xmm(GPR[ra])); + XmmFinalize(vr, rt); + } + LOG_OPCODE(); + } + void SFHI(u32 rt, u32 ra, s32 i10) + { + if (i10 == 0) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + c.psubw(v0.get(), cpu_xmm(GPR[ra])); + XmmFinalize(v0, rt); + } + else if (i10 == -1) + { + // fill with 1 + const XmmLink& v1 = XmmAlloc(rt); + c.pcmpeqw(v1.get(), v1.get()); + c.psubw(v1.get(), cpu_xmm(GPR[ra])); + XmmFinalize(v1, rt); + } + else + { + const XmmLink& vr = XmmAlloc(rt); + c.movdqa(vr.get(), XmmConst(_mm_set1_epi16(i10))); + c.psubw(vr.get(), cpu_xmm(GPR[ra])); + XmmFinalize(vr, rt); + } + LOG_OPCODE(); + } + void ANDI(u32 rt, u32 ra, s32 i10) + { + if (i10 == 0) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else if (i10 == -1) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + const XmmLink& va = XmmGet(ra, rt); + c.pand(va.get(), XmmConst(_mm_set1_epi32(i10))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void ANDHI(u32 rt, u32 ra, s32 i10) + { + if (i10 == 0) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else if (i10 == -1) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + const XmmLink& va = XmmGet(ra, rt); + c.pand(va.get(), XmmConst(_mm_set1_epi16(i10))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void ANDBI(u32 rt, u32 ra, s32 i10) + { + if (i10 == 0) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else if (i10 == -1) + { + // mov + if (ra != rt) + { + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + const XmmLink& va = XmmGet(ra, rt); + c.pand(va.get(), XmmConst(_mm_set1_epi8(i10))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void AI(u32 rt, u32 ra, s32 i10) + { + if (i10 == 0) + { + if (rt != ra) + { + // mov + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // add + const XmmLink& va = XmmGet(ra, rt); + c.paddd(va.get(), XmmConst(_mm_set1_epi32(i10))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void AHI(u32 rt, u32 ra, s32 i10) + { + if (i10 == 0) + { + if (rt != ra) + { + // mov + const XmmLink& va = XmmGet(ra, rt); + XmmFinalize(va, rt); + } + // else nop + } + else + { + // add + const XmmLink& va = XmmGet(ra, rt); + c.paddw(va.get(), XmmConst(_mm_set1_epi16(i10))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void STQD(u32 rt, s32 i10, u32 ra) // i10 is shifted left by 4 while decoding + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + if (i10) c.add(*addr, i10); + c.and_(*addr, 0x3fff0); + c.mov(*qw0, cpu_qword(GPR[rt]._u64[0])); + c.mov(*qw1, cpu_qword(GPR[rt]._u64[1])); + c.bswap(*qw0); + c.bswap(*qw1); + c.mov(qword_ptr(*ls_var, *addr, 0, 0), *qw1); + c.mov(qword_ptr(*ls_var, *addr, 0, 8), *qw0); + LOG_OPCODE(); + } + void LQD(u32 rt, s32 i10, u32 ra) // i10 is shifted left by 4 while decoding + { + XmmInvalidate(rt); + + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + if (i10) c.add(*addr, i10); + c.and_(*addr, 0x3fff0); + c.mov(*qw0, qword_ptr(*ls_var, *addr, 0, 0)); + c.mov(*qw1, qword_ptr(*ls_var, *addr, 0, 8)); + c.bswap(*qw0); + c.bswap(*qw1); + c.mov(cpu_qword(GPR[rt]._u64[0]), *qw1); + c.mov(cpu_qword(GPR[rt]._u64[1]), *qw0); + LOG_OPCODE(); + } + void XORI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra); + c.pxor(va.get(), XmmConst(_mm_set1_epi32(i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void XORHI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra); + c.pxor(va.get(), XmmConst(_mm_set1_epi16(i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void XORBI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra); + c.pxor(va.get(), XmmConst(_mm_set1_epi8(i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CGTI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra); + c.pcmpgtd(va.get(), XmmConst(_mm_set1_epi32(i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CGTHI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra); + c.pcmpgtw(va.get(), XmmConst(_mm_set1_epi16(i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CGTBI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra); + c.pcmpgtb(va.get(), XmmConst(_mm_set1_epi8(i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void HGTI(u32 rt, u32 ra, s32 i10) + { + c.mov(*addr, cpu_dword(GPR[ra]._i32[3])); + c.cmp(*addr, i10); + c.mov(*addr, 0); + c.setg(*addr); + c.neg(*addr); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + c.xor_(*pos_var, *addr); + do_finalize = true; + LOG_OPCODE(); + } + void CLGTI(u32 rt, u32 ra, s32 i10) + { + if (i10 == -1) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& va = XmmGet(ra); + c.psubd(va.get(), XmmConst(_mm_set1_epi32(0x80000000))); + c.pcmpgtd(va.get(), XmmConst(_mm_set1_epi32((u32)i10 - 0x80000000))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void CLGTHI(u32 rt, u32 ra, s32 i10) + { + if (i10 == -1) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& va = XmmGet(ra); + c.psubw(va.get(), XmmConst(_mm_set1_epi16((u16)0x8000))); + c.pcmpgtw(va.get(), XmmConst(_mm_set1_epi16((u16)i10 - 0x8000))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void CLGTBI(u32 rt, u32 ra, s32 i10) + { + if (i10 == -1) + { + // zero + const XmmLink& v0 = XmmAlloc(rt); + c.pxor(v0.get(), v0.get()); + XmmFinalize(v0, rt); + } + else + { + const XmmLink& va = XmmGet(ra); + c.psubb(va.get(), XmmConst(_mm_set1_epi8((s8)0x80))); + c.pcmpgtb(va.get(), XmmConst(_mm_set1_epi8((s8)i10 - 0x80))); + XmmFinalize(va, rt); + } + LOG_OPCODE(); + } + void HLGTI(u32 rt, u32 ra, s32 i10) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.cmp(*addr, i10); + c.mov(*addr, 0); + c.seta(*addr); + c.neg(*addr); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + c.xor_(*pos_var, *addr); + do_finalize = true; + LOG_OPCODE(); + } + void MPYI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra, rt); + c.pslld(va.get(), 16); + c.psrad(va.get(), 16); + c.pmulld(va.get(), XmmConst(_mm_set1_epi32(i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void MPYUI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra, rt); + c.pslld(va.get(), 16); + c.psrld(va.get(), 16); + c.pmulld(va.get(), XmmConst(_mm_set1_epi32(i10 & 0xffff))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CEQI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra); + c.pcmpeqd(va.get(), XmmConst(_mm_set1_epi32(i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CEQHI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra); + c.pcmpeqw(va.get(), XmmConst(_mm_set1_epi16((s16)i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void CEQBI(u32 rt, u32 ra, s32 i10) + { + const XmmLink& va = XmmGet(ra); + c.pcmpeqb(va.get(), XmmConst(_mm_set1_epi8((s8)i10))); + XmmFinalize(va, rt); + LOG_OPCODE(); + } + void HEQI(u32 rt, u32 ra, s32 i10) + { + c.mov(*addr, cpu_dword(GPR[ra]._u32[3])); + c.cmp(*addr, i10); + c.mov(*addr, 0); + c.sete(*addr); + c.neg(*addr); + c.mov(*pos_var, (CPU.PC >> 2) + 1); + c.xor_(*pos_var, *addr); + do_finalize = true; + LOG_OPCODE(); + } + + + //0 - 6 + void HBRA(s32 ro, s32 i16) + { //i16 is shifted left by 2 while decoding + LOG_OPCODE(); + } + void HBRR(s32 ro, s32 i16) + { + LOG_OPCODE(); + } + void ILA(u32 rt, u32 i18) + { + const XmmLink& vr = XmmAlloc(rt); + if (i18 == 0) + { + c.pxor(vr.get(), vr.get()); + } + else + { + c.movdqa(vr.get(), XmmConst(_mm_set1_epi32(i18 & 0x3ffff))); + } + XmmFinalize(vr, rt); + LOG_OPCODE(); + } + + //0 - 3 + void SELB(u32 rt, u32 ra, u32 rb, u32 rc) + { + const XmmLink& vb = XmmGet(rb); + const XmmLink& vc = XmmGet(rc); + c.pand(vb.get(), vc.get()); + c.pandn(vc.get(), cpu_xmm(GPR[ra])); + c.por(vb.get(), vc.get()); + XmmFinalize(vb, rt); + XmmFinalize(vc); + LOG_OPCODE(); + } + void SHUFB(u32 rt, u32 ra, u32 rb, u32 rc) + { + /*WRAPPER_BEGIN(rc, rt, ra, rb); + const SPU_GPR_hdr _a = CPU.GPR[ra]; + const SPU_GPR_hdr _b = CPU.GPR[rb]; + for (int i = 0; i < 16; i++) + { + u8 b = CPU.GPR[rc]._u8[i]; + if (b & 0x80) + { + if (b & 0x40) + { + if (b & 0x20) + CPU.GPR[rt]._u8[i] = 0x80; + else + CPU.GPR[rt]._u8[i] = 0xFF; + } + else + CPU.GPR[rt]._u8[i] = 0x00; + } + else + { + if (b & 0x10) + CPU.GPR[rt]._u8[i] = _b._u8[15 - (b & 0x0F)]; + else + CPU.GPR[rt]._u8[i] = _a._u8[15 - (b & 0x0F)]; + } + } + WRAPPER_END(rc, rt, ra, rb);*/ + + const XmmLink& v0 = XmmGet(rc); // v0 = mask + const XmmLink& v1 = XmmAlloc(); + const XmmLink& v2 = XmmCopy(v0); // v2 = mask + const XmmLink& v3 = XmmAlloc(); + const XmmLink& v4 = XmmAlloc(); + const XmmLink& vFF = XmmAlloc(rt); + // generate specific values: + c.movdqa(v1.get(), XmmConst(_mm_set1_epi32(0xe0e0e0e0))); // v1 = 11100000 + c.movdqa(v3.get(), XmmConst(_mm_set1_epi32(0x80808080))); // v3 = 10000000 + c.pand(v2.get(), v1.get()); // filter mask v2 = mask & 11100000 + c.movdqa(vFF.get(), v2.get()); // and copy vFF = mask & 11100000 + c.movdqa(v4.get(), XmmConst(_mm_set1_epi32(0xc0c0c0c0))); // v4 = 11000000 + c.pcmpeqb(vFF.get(), v4.get()); // gen 0xff vFF = (mask & 11100000 == 11000000) ? 0xff : 0 + c.movdqa(v4.get(), v2.get()); // copy again v4 = mask & 11100000 + c.pand(v4.get(), v3.get()); // filter mask v4 = mask & 10000000 + c.pcmpeqb(v2.get(), v1.get()); // v2 = (mask & 11100000 == 11100000) ? 0xff : 0 + c.pcmpeqb(v4.get(), v3.get()); // v4 = (mask & 10000000 == 10000000) ? 0xff : 0 + c.pand(v2.get(), v3.get()); // generate 0x80 v2 = (mask & 11100000 == 11100000) ? 0x80 : 0 + c.por(vFF.get(), v2.get()); // merge 0xff, 0x80 vFF = (mask & 11100000 == 11000000) ? 0xff : (mask & 11100000 == 11100000) ? 0x80 : 0 + c.pandn(v1.get(), v0.get()); // filter mask v1 = mask & 00011111 + // select bytes from [rb]: + c.movdqa(v2.get(), XmmConst(_mm_set1_epi8(15))); // v2 = 00001111 + c.pxor(v1.get(), XmmConst(_mm_set1_epi8(0x10))); // v1 = (mask & 00011111) ^ 00010000 + c.psubb(v2.get(), v1.get()); // v2 = 00001111 - ((mask & 00011111) ^ 00010000) + c.movdqa(v1.get(), cpu_xmm(GPR[rb])); // v1 = rb + c.pshufb(v1.get(), v2.get()); // v1 = select(rb, 00001111 - ((mask & 00011111) ^ 00010000)) + // select bytes from [ra]: + c.pxor(v2.get(), XmmConst(_mm_set1_epi32(0xf0f0f0f0))); // v2 = (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000 + c.movdqa(v3.get(), cpu_xmm(GPR[ra])); // v3 = ra + c.pshufb(v3.get(), v2.get()); // v3 = select(ra, (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000) + c.por(v1.get(), v3.get()); // v1 = select(rb, 00001111 - ((mask & 00011111) ^ 00010000)) | (v3) + c.pandn(v4.get(), v1.get()); // filter result v4 = v1 & ((mask & 10000000 == 10000000) ? 0 : 0xff) + c.por(vFF.get(), v4.get()); // final merge vFF = (mask & 10000000 == 10000000) ? ((mask & 11100000 == 11000000) ? 0xff : (mask & 11100000 == 11100000) ? 0x80 : 0) : (v1) + XmmFinalize(vFF, rt); + XmmFinalize(v4); + XmmFinalize(v3); + XmmFinalize(v2); + XmmFinalize(v1); + XmmFinalize(v0); + LOG_OPCODE(); + } + void MPYA(u32 rt, u32 ra, u32 rb, u32 rc) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vb = XmmGet(rb); + c.pslld(va.get(), 16); + c.pslld(vb.get(), 16); + c.psrad(va.get(), 16); + c.psrad(vb.get(), 16); + c.pmulld(va.get(), vb.get()); + c.paddd(va.get(), cpu_xmm(GPR[rc])); + XmmFinalize(va, rt); + XmmFinalize(vb); + LOG_OPCODE(); + } + void FNMS(u32 rt, u32 ra, u32 rb, u32 rc) + { + const XmmLink& va = XmmGet(ra); + const XmmLink& vc = (ra == rc) ? XmmCopy(va, rt) : XmmGet(rc, rt); + + if (ra == rb) + { + c.mulps(va.get(), va.get()); + } + else if (rb == rc) + { + c.mulps(va.get(), vc.get()); + } + else + { + if (const XmmLink* vb = XmmRead(rb)) + { + c.mulps(va.get(), vb->read()); + } + else + { + c.mulps(va.get(), cpu_xmm(GPR[rb])); + } + } + c.subps(vc.get(), va.get()); + XmmFinalize(vc, rt); + XmmFinalize(va); + LOG_OPCODE(); + } + void FMA(u32 rt, u32 ra, u32 rb, u32 rc) + { + if (ra != rb && rb != rc && rc != ra) + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.mulps(va.get(), vb->read()); + } + else + { + c.mulps(va.get(), cpu_xmm(GPR[rb])); + } + if (const XmmLink* vc = XmmRead(rc)) + { + c.addps(va.get(), vc->read()); + } + else + { + c.addps(va.get(), cpu_xmm(GPR[rc])); + } + XmmFinalize(va, rt); + } + else if (ra == rb && rb == rc) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vc = XmmCopy(va); + c.mulps(va.get(), va.get()); + c.addps(va.get(), vc.get()); + XmmFinalize(va, rt); + XmmFinalize(vc); + } + else if (ra == rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mulps(va.get(), va.get()); + if (const XmmLink* vc = XmmRead(rc)) + { + c.addps(va.get(), vc->read()); + } + else + { + c.addps(va.get(), cpu_xmm(GPR[rc])); + } + XmmFinalize(va, rt); + } + else if (rb == rc) + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vc = XmmRead(rc)) + { + c.mulps(va.get(), vc->read()); + c.addps(va.get(), vc->read()); + } + else + { + const XmmLink& vb = XmmGet(rb, rb); + c.mulps(va.get(), vb.get()); + c.addps(va.get(), vb.get()); + XmmFinalize(vb, rb); + } + XmmFinalize(va, rt); + } + else if (ra == rc) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vc = XmmCopy(va); + if (const XmmLink* vb = XmmRead(rb)) + { + c.mulps(va.get(), vb->read()); + } + else + { + c.mulps(va.get(), cpu_xmm(GPR[rb])); + } + c.addps(va.get(), vc.get()); + XmmFinalize(va, rt); + XmmFinalize(vc); + } + else + { + throw __FUNCTION__"(): invalid case"; + } + LOG_OPCODE(); + } + void FMS(u32 rt, u32 ra, u32 rb, u32 rc) + { + if (ra != rb && rb != rc && rc != ra) + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vb = XmmRead(rb)) + { + c.mulps(va.get(), vb->read()); + } + else + { + c.mulps(va.get(), cpu_xmm(GPR[rb])); + } + if (const XmmLink* vc = XmmRead(rc)) + { + c.subps(va.get(), vc->read()); + } + else + { + c.subps(va.get(), cpu_xmm(GPR[rc])); + } + XmmFinalize(va, rt); + } + else if (ra == rb && rb == rc) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vc = XmmCopy(va); + c.mulps(va.get(), va.get()); + c.subps(va.get(), vc.get()); + XmmFinalize(va, rt); + XmmFinalize(vc); + } + else if (ra == rb) + { + const XmmLink& va = XmmGet(ra, rt); + c.mulps(va.get(), va.get()); + if (const XmmLink* vc = XmmRead(rc)) + { + c.subps(va.get(), vc->read()); + } + else + { + c.subps(va.get(), cpu_xmm(GPR[rc])); + } + XmmFinalize(va, rt); + } + else if (rb == rc) + { + const XmmLink& va = XmmGet(ra, rt); + if (const XmmLink* vc = XmmRead(rc)) + { + c.mulps(va.get(), vc->read()); + c.subps(va.get(), vc->read()); + } + else + { + const XmmLink& vb = XmmGet(rb, rb); + c.mulps(va.get(), vb.get()); + c.subps(va.get(), vb.get()); + XmmFinalize(vb, rb); + } + XmmFinalize(va, rt); + } + else if (ra == rc) + { + const XmmLink& va = XmmGet(ra, rt); + const XmmLink& vc = XmmCopy(va); + if (const XmmLink* vb = XmmRead(rb)) + { + c.mulps(va.get(), vb->read()); + } + else + { + c.mulps(va.get(), cpu_xmm(GPR[rb])); + } + c.subps(va.get(), vc.get()); + XmmFinalize(va, rt); + XmmFinalize(vc); + } + else + { + throw __FUNCTION__"(): invalid case"; + } + LOG_OPCODE(); + } + + void UNK(u32 code, u32 opcode, u32 gcode) + { + UNK(fmt::Format("(SPURecompiler) Unimplemented opcode! (0x%08x, 0x%x, 0x%x)", code, opcode, gcode)); + } + + void UNK(const std::string& err) + { + ConLog.Error(err + fmt::Format(" #pc: 0x%x", CPU.PC)); + c.mov(cpu_qword(PC), (u32)CPU.PC); + do_finalize = true; + Emu.Pause(); + } + + +}; + +#undef c \ No newline at end of file diff --git a/rpcs3/Emu/Cell/SPURecompilerCore.cpp b/rpcs3/Emu/Cell/SPURecompilerCore.cpp new file mode 100644 index 0000000000..56d7e65d38 --- /dev/null +++ b/rpcs3/Emu/Cell/SPURecompilerCore.cpp @@ -0,0 +1,252 @@ +#include "stdafx.h" +#include "SPUInstrTable.h" +#include "SPUDisAsm.h" +#include "SPUInterpreter.h" +#include "SPURecompiler.h" + +static const g_imm_table_struct g_imm_table; + +SPURecompilerCore::SPURecompilerCore(SPUThread& cpu) +: m_enc(new SPURecompiler(cpu, *this)) +, inter(new SPUInterpreter(cpu)) +, CPU(cpu) +, first(true) +{ + memset(entry, 0, sizeof(entry)); +} + +SPURecompilerCore::~SPURecompilerCore() +{ + delete m_enc; + delete inter; +} + +void SPURecompilerCore::Decode(const u32 code) // decode instruction and run with interpreter +{ + (*SPU_instr::rrr_list)(inter, code); +} + +void SPURecompilerCore::Compile(u16 pos) +{ + const u64 stamp0 = get_system_time(); + u64 time0 = 0; + + SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode); + + StringLogger stringLogger; + stringLogger.setOption(kLoggerOptionBinaryForm, true); + + Compiler compiler(&runtime); + m_enc->compiler = &compiler; + compiler.setLogger(&stringLogger); + + compiler.addFunc(kFuncConvHost, FuncBuilder4()); + const u16 start = pos; + u32 excess = 0; + entry[start].count = 0; + + GpVar cpu_var(compiler, kVarTypeIntPtr, "cpu"); + compiler.setArg(0, cpu_var); + compiler.alloc(cpu_var); + m_enc->cpu_var = &cpu_var; + + GpVar ls_var(compiler, kVarTypeIntPtr, "ls"); + compiler.setArg(1, ls_var); + compiler.alloc(ls_var); + m_enc->ls_var = &ls_var; + + GpVar imm_var(compiler, kVarTypeIntPtr, "imm"); + compiler.setArg(2, imm_var); + compiler.alloc(imm_var); + m_enc->imm_var = &imm_var; + + GpVar g_imm_var(compiler, kVarTypeIntPtr, "g_imm"); + compiler.setArg(3, g_imm_var); + compiler.alloc(g_imm_var); + m_enc->g_imm_var = &g_imm_var; + + GpVar pos_var(compiler, kVarTypeUInt32, "pos"); + m_enc->pos_var = &pos_var; + GpVar addr_var(compiler, kVarTypeUInt32, "addr"); + m_enc->addr = &addr_var; + GpVar qw0_var(compiler, kVarTypeUInt64, "qw0"); + m_enc->qw0 = &qw0_var; + GpVar qw1_var(compiler, kVarTypeUInt64, "qw1"); + m_enc->qw1 = &qw1_var; + GpVar qw2_var(compiler, kVarTypeUInt64, "qw2"); + m_enc->qw2 = &qw2_var; + + for (u32 i = 0; i < 16; i++) + { + m_enc->xmm_var[i].data = new XmmVar(compiler, kVarTypeXmm, fmt::Format("reg_%d", i).c_str()); + } + + compiler.xor_(pos_var, pos_var); + + while (true) + { + const u32 opcode = Memory.Read32(CPU.dmac.ls_offset + pos * 4); + m_enc->do_finalize = false; + if (opcode) + { + const u64 stamp1 = get_system_time(); + // disasm for logging: + dis_asm.dump_pc = CPU.dmac.ls_offset + pos * 4; + (*SPU_instr::rrr_list)(&dis_asm, opcode); + compiler.addComment(fmt::Format("SPU data: PC=0x%05x %s", pos * 4, dis_asm.last_opcode.c_str()).c_str()); + // compile single opcode: + (*SPU_instr::rrr_list)(m_enc, opcode); + // force finalization between every slice using absolute alignment + /*if ((pos % 128 == 127) && !m_enc->do_finalize) + { + compiler.mov(pos_var, pos + 1); + m_enc->do_finalize = true; + }*/ + entry[start].count++; + time0 += get_system_time() - stamp1; + } + else + { + m_enc->do_finalize = true; + } + bool fin = m_enc->do_finalize; + if (entry[pos].valid == re(opcode)) + { + excess++; + } + entry[pos].valid = re(opcode); + + if (fin) break; + CPU.PC += 4; + pos++; + } + + m_enc->XmmRelease(); + + for (u32 i = 0; i < 16; i++) + { + assert(!m_enc->xmm_var[i].taken); + delete m_enc->xmm_var[i].data; + m_enc->xmm_var[i].data = nullptr; + } + + const u64 stamp1 = get_system_time(); + compiler.ret(pos_var); + compiler.endFunc(); + entry[start].pointer = compiler.make(); + compiler.setLogger(nullptr); // crashes without it + + wxFile log; + log.Open(wxString::Format("SPUjit_%d.log", GetCurrentSPUThread().GetId()), first ? wxFile::write : wxFile::write_append); + log.Write(wxString::Format("========== START POSITION 0x%x ==========\n\n", start * 4)); + log.Write(wxString(stringLogger.getString())); + log.Write(wxString::Format("========== COMPILED %d (excess %d), time: [start=%lld (decoding=%lld), finalize=%lld]\n\n", + entry[start].count, excess, stamp1 - stamp0, time0, get_system_time() - stamp1)); + log.Close(); + m_enc->compiler = nullptr; + first = false; +} + +u8 SPURecompilerCore::DecodeMemory(const u64 address) +{ + assert(CPU.dmac.ls_offset == address - CPU.PC); + const u64 m_offset = CPU.dmac.ls_offset; + const u16 pos = (CPU.PC >> 2); + + //ConLog.Write("DecodeMemory: pos=%d", pos); + u32* ls = (u32*)&Memory[m_offset]; + + if (!pos) + { + ConLog.Error("SPURecompilerCore::DecodeMemory(): ls_addr = 0"); + Emu.Pause(); + return 0; + } + + if (entry[pos].pointer) + { + // check data (hard way) + bool is_valid = true; + /*for (u32 i = pos; i < (u32)(entry[pos].count + pos); i++) + { + if (entry[i].valid != ls[i]) + { + is_valid = false; + break; + } + }*/ + // invalidate if necessary + if (!is_valid) + { + // TODO + ConLog.Error("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): code has changed", pos * sizeof(u32)); + Emu.Pause(); + return 0; + } + } + + bool did_compile = false; + if (!entry[pos].pointer) + { + Compile(pos); + did_compile = true; + if (entry[pos].valid == 0) + { + ConLog.Error("SPURecompilerCore::Compile(ls_addr=0x%x): branch to 0x0 opcode", pos * sizeof(u32)); + Emu.Pause(); + return 0; + } + } + + if (!entry[pos].pointer) + { + ConLog.Error("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): compilation failed", pos * sizeof(u32)); + Emu.Pause(); + return 0; + } + + typedef u32(*Func)(const void* _cpu, const void* _ls, const void* _imm, const void* _g_imm); + + Func func = asmjit_cast(entry[pos].pointer); + + void* cpu = (u8*)&CPU.GPR[0] - offsetof(SPUThread, GPR[0]); // ugly cpu base offset detection + + //if (did_compile) + { + //LOG2_OPCODE("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): NewPC = 0x%llx", address, (u64)res << 2); + //if (pos == 0x19c >> 2) + { + //Emu.Pause(); + //for (uint i = 0; i < 128; ++i) ConLog.Write("r%d = 0x%s", i, CPU.GPR[i].ToString().c_str()); + } + } + + u32 res = pos; + res = func(cpu, &Memory[m_offset], imm_table.data(), &g_imm_table); + + if (res > 0xffff) + { + CPU.Stop(); + res = ~res; + } + + if (did_compile) + { + //LOG2_OPCODE("SPURecompilerCore::DecodeMemory(ls_addr=0x%x): NewPC = 0x%llx", address, (u64)res << 2); + //if (pos == 0x340 >> 2) + { + //Emu.Pause(); + //for (uint i = 0; i < 128; ++i) ConLog.Write("r%d = 0x%s", i, CPU.GPR[i].ToString().c_str()); + } + } + + if ((res - 1) == (CPU.PC >> 2)) + { + return 4; + } + else + { + CPU.SetBranch((u64)res << 2); + return 0; + } +} \ No newline at end of file diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index e3cc2b8db0..f93e39f4d5 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -3,6 +3,7 @@ #include "Emu/Cell/SPUDecoder.h" #include "Emu/Cell/SPUInterpreter.h" #include "Emu/Cell/SPUDisAsm.h" +#include "Emu/Cell/SPURecompiler.h" SPUThread& GetCurrentSPUThread() { @@ -75,6 +76,8 @@ void SPUThread::DoRun() break; case 1: + m_dec = new SPURecompilerCore(*this); + break; case 2: m_dec = new SPUDecoder(*new SPUInterpreter(*this)); break; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index b4b9807f45..07894259b8 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -213,20 +213,21 @@ public: union SPU_GPR_hdr { + u32 _u32[4]; + float _f[4]; u128 _u128; s128 _i128; __m128 _m128; __m128i _m128i; u64 _u64[2]; s64 _i64[2]; - u32 _u32[4]; s32 _i32[4]; u16 _u16[8]; s16 _i16[8]; u8 _u8[16]; s8 _i8[16]; double _d[2]; - float _f[4]; + SPU_GPR_hdr() {} @@ -243,9 +244,9 @@ union SPU_GPR_hdr union SPU_SPR_hdr { + u32 _u32[4]; u128 _u128; s128 _i128; - u32 _u32[4]; SPU_SPR_hdr() {} @@ -299,7 +300,6 @@ public: #else static const bool x86 = true; #endif - private: union _CRT_ALIGN(8) { struct { @@ -311,7 +311,6 @@ public: std::mutex m_lock; public: - Channel() { Init(); @@ -332,7 +331,7 @@ public: return false; } res = m_value[0]; - for (u32 i = 1; i < max_count; i++) // FIFO + if (max_count > 1) for (u32 i = 1; i < max_count; i++) // FIFO { m_value[i-1] = m_value[i]; } @@ -586,7 +585,7 @@ public: } } - Sleep(1); // hack + //Sleep(1); // hack switch(cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_LIST_MASK | MFC_RESULT_MASK)) { @@ -1125,6 +1124,115 @@ public: if (Emu.IsStopped()) ConLog.Warning("%s(%s) aborted", __FUNCTION__, spu_ch_name[ch]); } + void DoStop(u32 code) + { + SetExitStatus(code); // exit code (not status) + + switch (code) + { + case 0x110: /* ===== sys_spu_thread_receive_event ===== */ + { + u32 spuq = 0; + if (!SPU.Out_MBox.Pop(spuq)) + { + ConLog.Error("sys_spu_thread_receive_event: cannot read Out_MBox"); + SPU.In_MBox.PushUncond(CELL_EINVAL); // ??? + return; + } + + if (SPU.In_MBox.GetCount()) + { + ConLog.Error("sys_spu_thread_receive_event(spuq=0x%x): In_MBox is not empty", spuq); + SPU.In_MBox.PushUncond(CELL_EBUSY); // ??? + return; + } + + if (Ini.HLELogging.GetValue()) + { + ConLog.Write("sys_spu_thread_receive_event(spuq=0x%x)", spuq); + } + + EventQueue* eq; + if (!SPUQs.GetEventQueue(FIX_SPUQ(spuq), eq)) + { + SPU.In_MBox.PushUncond(CELL_EINVAL); // TODO: check error value + return; + } + + u32 tid = GetId(); + + eq->sq.push(tid); // add thread to sleep queue + + while (true) + { + switch (eq->owner.trylock(tid)) + { + case SMR_OK: + if (!eq->events.count()) + { + eq->owner.unlock(tid); + break; + } + else + { + u32 next = (eq->protocol == SYS_SYNC_FIFO) ? eq->sq.pop() : eq->sq.pop_prio(); + if (next != tid) + { + eq->owner.unlock(tid, next); + break; + } + } + case SMR_SIGNAL: + { + sys_event_data event; + eq->events.pop(event); + eq->owner.unlock(tid); + SPU.In_MBox.PushUncond(CELL_OK); + SPU.In_MBox.PushUncond(event.data1); + SPU.In_MBox.PushUncond(event.data2); + SPU.In_MBox.PushUncond(event.data3); + return; + } + case SMR_FAILED: break; + default: eq->sq.invalidate(tid); SPU.In_MBox.PushUncond(CELL_ECANCELED); return; + } + + Sleep(1); + if (Emu.IsStopped()) + { + ConLog.Warning("sys_spu_thread_receive_event(spuq=0x%x) aborted", spuq); + eq->sq.invalidate(tid); + return; + } + } + } + break; + case 0x102: + if (!SPU.Out_MBox.GetCount()) + { + ConLog.Error("sys_spu_thread_exit (no status, code 0x102)"); + } + else if (Ini.HLELogging.GetValue()) + { + // the real exit status + ConLog.Write("sys_spu_thread_exit (status=0x%x)", SPU.Out_MBox.GetValue()); + } + Stop(); + break; + default: + if (!SPU.Out_MBox.GetCount()) + { + ConLog.Error("Unknown STOP code: 0x%x (no message)", code); + } + else + { + ConLog.Error("Unknown STOP code: 0x%x (message=0x%x)", code, SPU.Out_MBox.GetValue()); + } + Stop(); + break; + } + } + bool IsGoodLSA(const u32 lsa) const { return Memory.IsGoodAddr(lsa + m_offset) && lsa < 0x40000; } virtual u8 ReadLS8 (const u32 lsa) const { return Memory.Read8 (lsa + m_offset); } // m_offset & 0x3fffc ????? virtual u16 ReadLS16 (const u32 lsa) const { return Memory.Read16 (lsa + m_offset); } diff --git a/rpcs3/Emu/SysCalls/Modules/cellSysutil.cpp b/rpcs3/Emu/SysCalls/Modules/cellSysutil.cpp index 87b5c68e01..e8e92d9fb4 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSysutil.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSysutil.cpp @@ -532,7 +532,7 @@ int cellAudioOutGetSoundAvailability(u32 audioOut, u32 type, u32 fs, u32 option) option = 0; - int available = 8; // should be at least 2 + int available = 2; // should be at least 2 switch(fs) { @@ -573,7 +573,7 @@ int cellAudioOutGetSoundAvailability2(u32 audioOut, u32 type, u32 fs, u32 ch, u3 option = 0; - int available = 8; // should be at least 2 + int available = 2; // should be at least 2 switch(fs) { diff --git a/rpcs3/Emu/SysCalls/lv2/SC_SPU_Thread.cpp b/rpcs3/Emu/SysCalls/lv2/SC_SPU_Thread.cpp index 8fd8550215..92f2f174d9 100644 --- a/rpcs3/Emu/SysCalls/lv2/SC_SPU_Thread.cpp +++ b/rpcs3/Emu/SysCalls/lv2/SC_SPU_Thread.cpp @@ -397,7 +397,7 @@ int sys_spu_initialize(u32 max_usable_spu, u32 max_raw_spu) //181 int sys_spu_thread_write_ls(u32 id, u32 address, u64 value, u32 type) { - sc_spu.Warning("sys_spu_thread_write_ls(id=%d, address=0x%x, value=0x%llx, type=0x%x)", + sc_spu.Log("sys_spu_thread_write_ls(id=%d, address=0x%x, value=0x%llx, type=0x%x)", id, address, value, type); CPUThread* thr = Emu.GetCPU().GetThread(id); @@ -430,7 +430,7 @@ int sys_spu_thread_write_ls(u32 id, u32 address, u64 value, u32 type) //182 int sys_spu_thread_read_ls(u32 id, u32 address, mem64_t value, u32 type) { - sc_spu.Warning("sys_spu_thread_read_ls(id=%d, address=0x%x, value_addr=0x%x, type=0x%x)", + sc_spu.Log("sys_spu_thread_read_ls(id=%d, address=0x%x, value_addr=0x%x, type=0x%x)", id, address, value.GetAddr(), type); CPUThread* thr = Emu.GetCPU().GetThread(id); @@ -468,7 +468,7 @@ int sys_spu_thread_read_ls(u32 id, u32 address, mem64_t value, u32 type) //190 int sys_spu_thread_write_spu_mb(u32 id, u32 value) { - sc_spu.Warning("sys_spu_thread_write_spu_mb(id=%d, value=0x%x)", id, value); + sc_spu.Log("sys_spu_thread_write_spu_mb(id=%d, value=0x%x)", id, value); CPUThread* thr = Emu.GetCPU().GetThread(id); diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 8f754b11cd..6d4a66dbff 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -95,7 +95,7 @@ - .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\Include;.\OpenAL\include;$(IncludePath) + .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\Include;.\OpenAL\include;$(IncludePath);..\asmjit\src\asmjit $(SolutionDir)bin\ ..\libs\$(Configuration)\;$(LibraryPath) $(ProjectName)-$(PlatformShortName)-dbg @@ -107,7 +107,7 @@ $(ProjectName)-$(PlatformShortName)-dbg - .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(IncludePath) + .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(IncludePath);..\asmjit\src\asmjit $(SolutionDir)bin\ ..\libs\$(Configuration)\;$(LibraryPath) $(ProjectName)-$(PlatformShortName)-dbg @@ -120,7 +120,7 @@ false - .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\Include;.\OpenAL\include;$(IncludePath) + .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\Include;.\OpenAL\include;$(IncludePath);..\asmjit\src\asmjit $(SolutionDir)bin\ ..\libs\$(Configuration)\;$(LibraryPath) false @@ -129,7 +129,7 @@ false - .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(IncludePath) + .\;..\wxWidgets\include;..\SDL-1.3.0-5538\include;..\SDL_image-1.2.10;..\pthreads-2.8.0;..\;..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\Include;.\OpenAL\include;$(IncludePath);..\asmjit\src\asmjit $(SolutionDir)bin\ ..\libs\$(Configuration)\;$(LibraryPath) false @@ -147,7 +147,7 @@ true - wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;%(AdditionalDependencies) + wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;asmjit.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) false ..\wxWidgets\lib\vc_lib;..\ffmpeg\Windows\x86\lib;..\OpenAL\Win32 @@ -168,7 +168,7 @@ true - wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;%(AdditionalDependencies) + wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;asmjit.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) false ..\wxWidgets\lib\vc_lib;..\ffmpeg\Windows\x86\lib;..\OpenAL\Win32 @@ -188,7 +188,7 @@ true - wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;%(AdditionalDependencies) + wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;asmjit.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) false ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64 @@ -211,7 +211,7 @@ true - wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;%(AdditionalDependencies) + wxmsw31ud_adv.lib;wxbase31ud.lib;wxmsw31ud_core.lib;wxmsw31ud_aui.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;asmjit.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) false ..\wxWidgets\lib\vc_x64_lib;..\ffmpeg\Windows\x86_64\lib;..\OpenAL\Win64 @@ -243,7 +243,7 @@ true true true - wxmsw31u_adv.lib;wxbase31u.lib;wxmsw31u_core.lib;wxmsw31u_aui.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;wxtiff.lib;wxjpeg.lib;wxpng.lib;wxzlib.lib;wxregexu.lib;wxexpat.lib;wsock32.lib;wininet.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib + wxmsw31u_adv.lib;wxbase31u.lib;wxmsw31u_core.lib;wxmsw31u_aui.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;wxtiff.lib;wxjpeg.lib;wxpng.lib;wxzlib.lib;wxregexu.lib;wxexpat.lib;wsock32.lib;wininet.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;asmjit.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) @@ -276,7 +276,7 @@ true true true - wxmsw31u_adv.lib;wxbase31u.lib;wxmsw31u_core.lib;wxmsw31u_aui.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;wxtiff.lib;wxjpeg.lib;wxpng.lib;wxzlib.lib;wxregexu.lib;wxexpat.lib;wsock32.lib;wininet.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;%(AdditionalDependencies) + wxmsw31u_adv.lib;wxbase31u.lib;wxmsw31u_core.lib;wxmsw31u_aui.lib;odbc32.lib;odbccp32.lib;comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;rpcrt4.lib;wxtiff.lib;wxjpeg.lib;wxpng.lib;wxzlib.lib;wxregexu.lib;wxexpat.lib;wsock32.lib;wininet.lib;avcodec.lib;avformat.lib;avutil.lib;swresample.lib;swscale.lib;OpenAL32.lib;EFX-Util.lib;asmjit.lib;%(AdditionalDependencies) %(IgnoreSpecificDefaultLibraries) @@ -310,6 +310,7 @@ + @@ -488,6 +489,7 @@ + diff --git a/rpcs3/rpcs3.vcxproj.filters b/rpcs3/rpcs3.vcxproj.filters index 9ff78c9308..e5adadf067 100644 --- a/rpcs3/rpcs3.vcxproj.filters +++ b/rpcs3/rpcs3.vcxproj.filters @@ -499,6 +499,9 @@ Utilities + + Emu\Cell + Emu\Memory @@ -978,6 +981,9 @@ Emu\SysCalls\Modules + + Emu\Cell + Emu\Cell