mirror of https://github.com/PCSX2/pcsx2.git
Core: ARM64 compatibility
This commit is contained in:
parent
7d098674f2
commit
71036c95a4
|
@ -954,7 +954,6 @@ set(pcsx2x86Sources
|
|||
x86/ix86-32/iR5900Templates.cpp
|
||||
x86/ix86-32/recVTLB.cpp
|
||||
x86/newVif_Dynarec.cpp
|
||||
x86/newVif_Unpack.cpp
|
||||
x86/newVif_UnpackSSE.cpp
|
||||
)
|
||||
|
||||
|
@ -995,7 +994,6 @@ set(pcsx2x86Headers
|
|||
x86/microVU_Tables.inl
|
||||
x86/microVU_Upper.inl
|
||||
x86/newVif.h
|
||||
x86/newVif_HashBucket.h
|
||||
x86/newVif_UnpackSSE.h
|
||||
x86/R5900_Profiler.h
|
||||
)
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include "GS.h"
|
||||
#include "Gif_Unit.h"
|
||||
#include "Vif_Dma.h"
|
||||
#include "x86/iR5900.h"
|
||||
|
||||
// A three-way toggle used to determine if the GIF is stalling (transferring) or done (finished).
|
||||
// Should be a gifstate_t rather then int, but I don't feel like possibly interfering with savestates right now.
|
||||
|
|
|
@ -118,6 +118,22 @@ struct Gif_Tag
|
|||
|
||||
// write out unpacked registers
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(regs), vregs);
|
||||
#elif defined(_M_ARM64)
|
||||
// zero out bits for registers which shouldn't be tested
|
||||
u64 REGS64;
|
||||
std::memcpy(®S64, tag.REGS, sizeof(u64));
|
||||
REGS64 &= (0xFFFFFFFFFFFFFFFFULL >> (64 - nRegs * 4));
|
||||
uint8x16_t vregs = vsetq_lane_u64(REGS64, vdupq_n_u64(0), 0);
|
||||
|
||||
// get upper nibbles, interleave with lower nibbles, clear upper bits from low nibbles
|
||||
vregs = vandq_u8(vzip1q_u8(vregs, vshrq_n_u8(vregs, 4)), vdupq_n_u8(0x0F));
|
||||
|
||||
// compare with GIF_REG_A_D, set hasAD if any lanes passed
|
||||
const uint8x16_t comp = vceqq_u8(vregs, vdupq_n_u8(GIF_REG_A_D));
|
||||
hasAD = vmaxvq_u8(comp) & 1;
|
||||
|
||||
// write out unpacked registers
|
||||
vst1q_u8(regs, vregs);
|
||||
#else
|
||||
// Reference C implementation.
|
||||
hasAD = false;
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include "R3000A.h"
|
||||
#include "R5900.h"
|
||||
#include "ps2/BiosTools.h"
|
||||
#include "x86/iR3000A.h"
|
||||
#include "VMManager.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
#include "IopHw.h"
|
||||
#include "Mdec.h"
|
||||
#include "R3000A.h"
|
||||
#include "x86/iR5900.h"
|
||||
|
||||
// NOTE: Any modifications to read/write fns should also go into their const counterparts
|
||||
// found in iPsxHw.cpp.
|
||||
|
|
|
@ -14,7 +14,7 @@ const uptr *psxMemRLUT = nullptr;
|
|||
|
||||
IopVM_MemoryAllocMess* iopMem = nullptr;
|
||||
|
||||
alignas(__pagesize) u8 iopHw[Ps2MemSize::IopHardware];
|
||||
alignas(__pagealignsize) u8 iopHw[Ps2MemSize::IopHardware];
|
||||
|
||||
void iopMemAlloc()
|
||||
{
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#include "Gif_Unit.h"
|
||||
#include "MTVU.h"
|
||||
#include "VMManager.h"
|
||||
#include "x86/newVif.h"
|
||||
#include "Vif_Dynarec.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
|
|
|
@ -98,7 +98,7 @@ u8* SysMemory::TryAllocateVirtualMemory(const char* name, void* file_handle, upt
|
|||
if (!baseptr)
|
||||
return nullptr;
|
||||
|
||||
if ((uptr)baseptr != base)
|
||||
if (base != 0 && (uptr)baseptr != base)
|
||||
{
|
||||
if (file_handle)
|
||||
{
|
||||
|
@ -122,6 +122,8 @@ u8* SysMemory::TryAllocateVirtualMemory(const char* name, void* file_handle, upt
|
|||
|
||||
u8* SysMemory::AllocateVirtualMemory(const char* name, void* file_handle, size_t size, size_t offset_from_base)
|
||||
{
|
||||
// ARM64 does not need the rec areas to be in +/- 2GB.
|
||||
#ifdef _M_X86
|
||||
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Virtual memory size is page aligned");
|
||||
|
||||
// Everything looks nicer when the start of all the sections is a nice round looking number.
|
||||
|
@ -148,6 +150,9 @@ u8* SysMemory::AllocateVirtualMemory(const char* name, void* file_handle, size_t
|
|||
DevCon.Warning("%s: host memory @ 0x%016" PRIXPTR " -> 0x%016" PRIXPTR " is unavailable; attempting to map elsewhere...", name,
|
||||
base, base + size);
|
||||
}
|
||||
#else
|
||||
return TryAllocateVirtualMemory(name, file_handle, 0, size);
|
||||
#endif
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -986,8 +991,8 @@ void memClearPageAddr(u32 vaddr)
|
|||
///////////////////////////////////////////////////////////////////////////
|
||||
// PS2 Memory Init / Reset / Shutdown
|
||||
|
||||
EEVM_MemoryAllocMess* eeMem = NULL;
|
||||
alignas(__pagesize) u8 eeHw[Ps2MemSize::Hardware];
|
||||
EEVM_MemoryAllocMess* eeMem = nullptr;
|
||||
alignas(__pagealignsize) u8 eeHw[Ps2MemSize::Hardware];
|
||||
|
||||
|
||||
void memBindConditionalHandlers()
|
||||
|
|
|
@ -59,8 +59,8 @@ struct IopVM_MemoryAllocMess
|
|||
// order to allow for simpler macros and reference handles to be defined (we can safely use
|
||||
// compile-time references to registers instead of having to use instance variables).
|
||||
|
||||
alignas(__pagesize) extern u8 eeHw[Ps2MemSize::Hardware];
|
||||
alignas(__pagesize) extern u8 iopHw[Ps2MemSize::IopHardware];
|
||||
alignas(__pagealignsize) extern u8 eeHw[Ps2MemSize::Hardware];
|
||||
alignas(__pagealignsize) extern u8 iopHw[Ps2MemSize::IopHardware];
|
||||
|
||||
|
||||
extern EEVM_MemoryAllocMess* eeMem;
|
||||
|
|
|
@ -34,8 +34,7 @@ using namespace R5900; // for R5900 disasm tools
|
|||
s32 EEsCycle; // used to sync the IOP to the EE
|
||||
u32 EEoCycle;
|
||||
|
||||
alignas(16) cpuRegisters cpuRegs;
|
||||
alignas(16) fpuRegisters fpuRegs;
|
||||
alignas(16) cpuRegistersPack _cpuRegistersPack;
|
||||
alignas(16) tlbs tlb[48];
|
||||
R5900cpu *Cpu = NULL;
|
||||
|
||||
|
|
|
@ -202,10 +202,18 @@ struct tlbs
|
|||
|
||||
#endif
|
||||
|
||||
alignas(16) extern cpuRegisters cpuRegs;
|
||||
alignas(16) extern fpuRegisters fpuRegs;
|
||||
struct cpuRegistersPack
|
||||
{
|
||||
alignas(16) cpuRegisters cpuRegs;
|
||||
alignas(16) fpuRegisters fpuRegs;
|
||||
};
|
||||
|
||||
alignas(16) extern cpuRegistersPack _cpuRegistersPack;
|
||||
alignas(16) extern tlbs tlb[48];
|
||||
|
||||
static cpuRegisters& cpuRegs = _cpuRegistersPack.cpuRegs;
|
||||
static fpuRegisters& fpuRegs = _cpuRegistersPack.fpuRegs;
|
||||
|
||||
extern bool eeEventTestIsActive;
|
||||
|
||||
void intUpdateCPUCycles();
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
#include "DebugTools/Debug.h"
|
||||
#include "R3000A.h"
|
||||
#include "x86/iR5900.h"
|
||||
#include "R5900.h"
|
||||
|
||||
#include "fmt/core.h"
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "SIO/Sio2.h"
|
||||
#include "SPU2/spu2.h"
|
||||
#include "USB/USB.h"
|
||||
#include "Vif_Dynarec.h"
|
||||
#include "VMManager.h"
|
||||
#include "ps2/BiosTools.h"
|
||||
#include "svnrev.h"
|
||||
|
@ -76,10 +77,6 @@
|
|||
#include "common/Darwin/DarwinMisc.h"
|
||||
#endif
|
||||
|
||||
#ifdef _M_X86
|
||||
#include "x86/newVif.h"
|
||||
#endif
|
||||
|
||||
namespace VMManager
|
||||
{
|
||||
static void SetDefaultLoggingSettings(SettingsInterface& si);
|
||||
|
@ -230,6 +227,14 @@ bool VMManager::PerformEarlyHardwareChecks(const char** error)
|
|||
return false;
|
||||
}
|
||||
#endif
|
||||
#elif defined(_M_ARM64)
|
||||
// Check page size. If it doesn't match, it is a fatal error.
|
||||
const size_t runtime_host_page_size = HostSys::GetRuntimePageSize();
|
||||
if (__pagesize != runtime_host_page_size)
|
||||
{
|
||||
*error = "Page size mismatch. This build cannot run on your Mac.\n\n" COMMON_DOWNLOAD_MESSAGE;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef COMMON_DOWNLOAD_MESSAGE
|
||||
|
@ -2502,6 +2507,7 @@ void VMManager::LogCPUCapabilities()
|
|||
LogUserPowerPlan();
|
||||
#endif
|
||||
|
||||
#ifdef _M_X86
|
||||
std::string features;
|
||||
if (cpuinfo_has_x86_avx())
|
||||
features += "AVX ";
|
||||
|
@ -2513,6 +2519,18 @@ void VMManager::LogCPUCapabilities()
|
|||
Console.WriteLn(Color_StrongBlack, "x86 Features Detected:");
|
||||
Console.WriteLnFmt(" {}", features);
|
||||
Console.WriteLn();
|
||||
#endif
|
||||
|
||||
#ifdef _M_ARM64
|
||||
const size_t runtime_cache_line_size = HostSys::GetRuntimeCacheLineSize();
|
||||
if (__cachelinesize != runtime_cache_line_size)
|
||||
{
|
||||
// Not fatal, but does have performance implications.
|
||||
WARNING_LOG(
|
||||
"Cache line size mismatch. This build was compiled with {} byte lines, but the system has {} byte lines.",
|
||||
__cachelinesize, runtime_cache_line_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
LogGPUCapabilities();
|
||||
|
@ -3197,6 +3215,8 @@ void VMManager::WarnAboutUnsafeSettings()
|
|||
append(ICON_FA_EXCLAMATION_CIRCLE,
|
||||
TRANSLATE_SV("VMManager", "INTC Spin Detection is not enabled, this may reduce performance."));
|
||||
}
|
||||
if (!EmuConfig.Cpu.Recompiler.EnableFastmem)
|
||||
append(ICON_FA_EXCLAMATION_CIRCLE, TRANSLATE_SV("VMManager", "Fastmem is not enabled, this will reduce performance."));
|
||||
if (!EmuConfig.Speedhacks.vu1Instant)
|
||||
{
|
||||
append(ICON_FA_EXCLAMATION_CIRCLE,
|
||||
|
@ -3322,6 +3342,12 @@ static u32 GetProcessorIdForProcessor(const cpuinfo_processor* proc)
|
|||
|
||||
static void InitializeProcessorList()
|
||||
{
|
||||
if (!cpuinfo_initialize())
|
||||
{
|
||||
Console.Error("cpuinfo_initialize() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 cluster_count = cpuinfo_get_clusters_count();
|
||||
if (cluster_count == 0)
|
||||
{
|
||||
|
@ -3448,6 +3474,10 @@ static void InitializeProcessorList()
|
|||
|
||||
static void SetMTVUAndAffinityControlDefault(SettingsInterface& si)
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
// Everything we support Mac-wise has enough cores for MTVU.
|
||||
si.SetBoolValue("EmuCore/Speedhacks", "vuThread", true);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -29,6 +29,8 @@ static __fi void vu0SetMicroFlags(u32* flags, u32 value)
|
|||
{
|
||||
#ifdef _M_X86
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(flags), _mm_set1_epi32(value));
|
||||
#elif defined(_M_ARM64)
|
||||
vst1q_u32(flags, vdupq_n_u32(value));
|
||||
#else
|
||||
flags[0] = flags[1] = flags[2] = flags[3] = value;
|
||||
#endif
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include "MTVU.h"
|
||||
#include "Vif.h"
|
||||
#include "Vif_Dma.h"
|
||||
#include "x86/newVif.h"
|
||||
#include "Vif_Dynarec.h"
|
||||
|
||||
alignas(16) vifStruct vif0, vif1;
|
||||
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
#include "Common.h"
|
||||
#include "Vif_Dma.h"
|
||||
#include "Vif_Dynarec.h"
|
||||
#include "VUmicro.h"
|
||||
#include "x86/newVif.h"
|
||||
|
||||
u32 g_vif0Cycles = 0;
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include "MTVU.h"
|
||||
#include "VUmicro.h"
|
||||
#include "Vif_Dma.h"
|
||||
#include "x86/newVif.h"
|
||||
#include "Vif_Dynarec.h"
|
||||
|
||||
u32 g_vif1Cycles = 0;
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include "MTVU.h"
|
||||
#include "VUmicro.h"
|
||||
#include "Vif_Dma.h"
|
||||
#include "x86/newVif.h"
|
||||
#include "Vif_Dynarec.h"
|
||||
|
||||
#define vifOp(vifCodeName) _vifT int vifCodeName(int pass, const u32* data)
|
||||
#define pass1 if (pass == 0)
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: LGPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Vif.h"
|
||||
#include "Vif_HashBucket.h"
|
||||
#include "VU.h"
|
||||
|
||||
typedef u32 (*nVifCall)(void*, const void*);
|
||||
typedef void (*nVifrecCall)(uptr dest, uptr src);
|
||||
|
||||
extern void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill);
|
||||
extern void dVifReset(int idx);
|
||||
extern void dVifRelease(int idx);
|
||||
extern void VifUnpackSSE_Init();
|
||||
|
||||
_vifT extern void dVifUnpack(const u8* data, bool isFill);
|
||||
|
||||
struct nVifStruct
|
||||
{
|
||||
// Buffer for partial transfers (should always be first to ensure alignment)
|
||||
// Maximum buffer size is 256 (vifRegs.Num max range) * 16 (quadword)
|
||||
alignas(16) u8 buffer[256*16];
|
||||
u32 bSize; // Size of 'buffer'
|
||||
|
||||
// VIF0 or VIF1 - provided for debugging helpfulness only, and is generally unused.
|
||||
// (templates are used for most or all VIF indexing)
|
||||
u32 idx;
|
||||
|
||||
u8* recWritePtr; // current write pos into the reserve
|
||||
u8* recEndPtr;
|
||||
|
||||
HashBucket vifBlocks; // Vif Blocks
|
||||
|
||||
|
||||
nVifStruct() = default;
|
||||
};
|
||||
|
||||
extern void resetNewVif(int idx);
|
||||
|
||||
alignas(16) extern nVifStruct nVif[2];
|
||||
alignas(16) extern nVifCall nVifUpk[(2 * 2 * 16) * 4]; // ([USN][Masking][Unpack Type]) [curCycle]
|
||||
alignas(16) extern u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector]
|
||||
|
||||
static constexpr bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "Common.h"
|
||||
#include "Vif_Dma.h"
|
||||
#include "x86/newVif.h"
|
||||
#include "Vif_Dynarec.h"
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// VifCode Transfer Interpreter (Vif0/Vif1)
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "Common.h"
|
||||
#include "Vif.h"
|
||||
#include "Vif_Dma.h"
|
||||
#include "Vif_Dynarec.h"
|
||||
#include "MTVU.h"
|
||||
|
||||
enum UnpackOffset {
|
||||
|
@ -244,3 +245,277 @@ _vifT void vifUnpackSetup(const u32 *data) {
|
|||
|
||||
template void vifUnpackSetup<0>(const u32 *data);
|
||||
template void vifUnpackSetup<1>(const u32 *data);
|
||||
|
||||
alignas(16) nVifStruct nVif[2];
|
||||
|
||||
// Interpreter-style SSE unpacks. Array layout matches the interpreter C unpacks.
|
||||
// ([USN][Masking][Unpack Type]) [curCycle]
|
||||
alignas(16) nVifCall nVifUpk[(2 * 2 * 16) * 4];
|
||||
|
||||
// This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks
|
||||
// and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly.
|
||||
// [MaskNumber][CycleNumber][Vector]
|
||||
alignas(16) u32 nVifMask[3][4][4] = {};
|
||||
|
||||
// Number of bytes of data in the source stream needed for each vector.
|
||||
// [equivalent to ((32 >> VL) * (VN+1)) / 8]
|
||||
alignas(16) const u8 nVifT[16] = {
|
||||
4, // S-32
|
||||
2, // S-16
|
||||
1, // S-8
|
||||
0, // ----
|
||||
8, // V2-32
|
||||
4, // V2-16
|
||||
2, // V2-8
|
||||
0, // ----
|
||||
12,// V3-32
|
||||
6, // V3-16
|
||||
3, // V3-8
|
||||
0, // ----
|
||||
16,// V4-32
|
||||
8, // V4-16
|
||||
4, // V4-8
|
||||
2, // V4-5
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
template <int idx, bool doMode, bool isFill>
|
||||
__ri void _nVifUnpackLoop(const u8* data);
|
||||
|
||||
typedef void FnType_VifUnpackLoop(const u8* data);
|
||||
typedef FnType_VifUnpackLoop* Fnptr_VifUnpackLoop;
|
||||
|
||||
// Unpacks Until 'Num' is 0
|
||||
alignas(16) static const Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
|
||||
{
|
||||
{_nVifUnpackLoop<0, 0, 0>, _nVifUnpackLoop<0, 0, 1>},
|
||||
{_nVifUnpackLoop<0, 1, 0>, _nVifUnpackLoop<0, 1, 1>},
|
||||
},
|
||||
{
|
||||
{_nVifUnpackLoop<1, 0, 0>, _nVifUnpackLoop<1, 0, 1>},
|
||||
{_nVifUnpackLoop<1, 1, 0>, _nVifUnpackLoop<1, 1, 1>},
|
||||
},
|
||||
};
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void resetNewVif(int idx)
|
||||
{
|
||||
// Safety Reset : Reassign all VIF structure info, just in case the VU1 pointers have
|
||||
// changed for some reason.
|
||||
|
||||
nVif[idx].idx = idx;
|
||||
nVif[idx].bSize = 0;
|
||||
std::memset(nVif[idx].buffer, 0, sizeof(nVif[idx].buffer));
|
||||
|
||||
if (newVifDynaRec)
|
||||
dVifReset(idx);
|
||||
}
|
||||
|
||||
void releaseNewVif(int idx)
|
||||
{
|
||||
}
|
||||
|
||||
static __fi u8* getVUptr(uint idx, int offset)
|
||||
{
|
||||
return (u8*)(vuRegs[idx].Mem + (offset & (idx ? 0x3ff0 : 0xff0)));
|
||||
}
|
||||
|
||||
|
||||
_vifT int nVifUnpack(const u8* data)
|
||||
{
|
||||
nVifStruct& v = nVif[idx];
|
||||
vifStruct& vif = GetVifX;
|
||||
VIFregisters& vifRegs = vifXRegs;
|
||||
|
||||
const uint wl = vifRegs.cycle.wl ? vifRegs.cycle.wl : 256;
|
||||
const uint ret = std::min(vif.vifpacketsize, vif.tag.size);
|
||||
const bool isFill = (vifRegs.cycle.cl < wl);
|
||||
s32 size = ret << 2;
|
||||
|
||||
if (ret == vif.tag.size) // Full Transfer
|
||||
{
|
||||
if (v.bSize) // Last transfer was partial
|
||||
{
|
||||
memcpy(&v.buffer[v.bSize], data, size);
|
||||
v.bSize += size;
|
||||
size = v.bSize;
|
||||
data = v.buffer;
|
||||
|
||||
vif.cl = 0;
|
||||
vifRegs.num = (vifXRegs.code >> 16) & 0xff; // grab NUM form the original VIFcode input.
|
||||
if (!vifRegs.num)
|
||||
vifRegs.num = 256;
|
||||
}
|
||||
|
||||
if (!idx || !THREAD_VU1)
|
||||
{
|
||||
if (newVifDynaRec)
|
||||
dVifUnpack<idx>(data, isFill);
|
||||
else
|
||||
_nVifUnpack(idx, data, vifRegs.mode, isFill);
|
||||
}
|
||||
else
|
||||
vu1Thread.VifUnpack(vif, vifRegs, (u8*)data, (size + 4) & ~0x3);
|
||||
|
||||
vif.pass = 0;
|
||||
vif.tag.size = 0;
|
||||
vif.cmd = 0;
|
||||
vifRegs.num = 0;
|
||||
v.bSize = 0;
|
||||
}
|
||||
else // Partial Transfer
|
||||
{
|
||||
memcpy(&v.buffer[v.bSize], data, size);
|
||||
v.bSize += size;
|
||||
vif.tag.size -= ret;
|
||||
|
||||
const u8& vSize = nVifT[vif.cmd & 0x0f];
|
||||
|
||||
// We need to provide accurate accounting of the NUM register, in case games decided
|
||||
// to read back from it mid-transfer. Since so few games actually use partial transfers
|
||||
// of VIF unpacks, this code should not be any bottleneck.
|
||||
|
||||
if (!isFill)
|
||||
{
|
||||
vifRegs.num -= (size / vSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
int dataSize = (size / vSize);
|
||||
vifRegs.num = vifRegs.num - (((dataSize / vifRegs.cycle.cl) * (vifRegs.cycle.wl - vifRegs.cycle.cl)) + dataSize);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template int nVifUnpack<0>(const u8* data);
|
||||
template int nVifUnpack<1>(const u8* data);
|
||||
|
||||
// This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks
|
||||
// and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly.
|
||||
static void setMasks(const vifStruct& vif, const VIFregisters& v)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
int m = (v.mask >> (i * 2)) & 3;
|
||||
switch (m)
|
||||
{
|
||||
case 0: // Data
|
||||
nVifMask[0][i / 4][i % 4] = 0xffffffff;
|
||||
nVifMask[1][i / 4][i % 4] = 0;
|
||||
nVifMask[2][i / 4][i % 4] = 0;
|
||||
break;
|
||||
case 1: // MaskRow
|
||||
nVifMask[0][i / 4][i % 4] = 0;
|
||||
nVifMask[1][i / 4][i % 4] = 0;
|
||||
nVifMask[2][i / 4][i % 4] = vif.MaskRow._u32[i % 4];
|
||||
break;
|
||||
case 2: // MaskCol
|
||||
nVifMask[0][i / 4][i % 4] = 0;
|
||||
nVifMask[1][i / 4][i % 4] = 0;
|
||||
nVifMask[2][i / 4][i % 4] = vif.MaskCol._u32[i / 4];
|
||||
break;
|
||||
case 3: // Write Protect
|
||||
nVifMask[0][i / 4][i % 4] = 0;
|
||||
nVifMask[1][i / 4][i % 4] = 0xffffffff;
|
||||
nVifMask[2][i / 4][i % 4] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Unpacking Optimization notes:
|
||||
// ----------------------------------------------------------------------------
|
||||
// Some games send a LOT of single-cycle packets (God of War, SotC, TriAce games, etc),
|
||||
// so we always need to be weary of keeping loop setup code optimized. It's not always
|
||||
// a "win" to move code outside the loop, like normally in most other loop scenarios.
|
||||
//
|
||||
// The biggest bottleneck of the current code is the call/ret needed to invoke the SSE
|
||||
// unpackers. A better option is to generate the entire vifRegs.num loop code as part
|
||||
// of the SSE template, and inline the SSE code into the heart of it. This both avoids
|
||||
// the call/ret and opens the door for resolving some register dependency chains in the
|
||||
// current emitted functions. (this is what zero's SSE does to get it's final bit of
|
||||
// speed advantage over the new vif). --air
|
||||
//
|
||||
// The BEST optimizatin strategy here is to use data available to us from the UNPACK dispatch
|
||||
// -- namely the unpack type and mask flag -- in combination mode and usn values -- to
|
||||
// generate ~600 special versions of this function. But since it's an interpreter, who gives
|
||||
// a crap? Really? :p
|
||||
//
|
||||
|
||||
// size - size of the packet fragment incoming from DMAC.
|
||||
template <int idx, bool doMode, bool isFill>
|
||||
__ri void _nVifUnpackLoop(const u8* data)
|
||||
{
|
||||
|
||||
vifStruct& vif = MTVU_VifX;
|
||||
VIFregisters& vifRegs = MTVU_VifXRegs;
|
||||
|
||||
// skipSize used for skipping writes only
|
||||
const int skipSize = (vifRegs.cycle.cl - vifRegs.cycle.wl) * 16;
|
||||
|
||||
//DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs.num, upkNum, vif.cl, blockSize, skipSize);
|
||||
|
||||
if (!doMode && (vif.cmd & 0x10))
|
||||
setMasks(vif, vifRegs);
|
||||
|
||||
const int usn = !!vif.usn;
|
||||
const int upkNum = vif.cmd & 0x1f;
|
||||
const u8& vSize = nVifT[upkNum & 0x0f];
|
||||
//uint vl = vif.cmd & 0x03;
|
||||
//uint vn = (vif.cmd >> 2) & 0x3;
|
||||
//uint vSize = ((32 >> vl) * (vn+1)) / 8; // size of data (in bytes) used for each write cycle
|
||||
|
||||
const nVifCall* fnbase = &nVifUpk[((usn * 2 * 16) + upkNum) * (4 * 1)];
|
||||
const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][((usn * 2 * 16) + upkNum)];
|
||||
|
||||
pxAssume(vif.cl == 0);
|
||||
//pxAssume (vifRegs.cycle.wl > 0);
|
||||
|
||||
do
|
||||
{
|
||||
u8* dest = getVUptr(idx, vif.tag.addr);
|
||||
|
||||
if (doMode)
|
||||
{
|
||||
//if (1) {
|
||||
ft(dest, data);
|
||||
}
|
||||
else
|
||||
{
|
||||
//DevCon.WriteLn("SSE Unpack!");
|
||||
uint cl3 = std::min(vif.cl, 3);
|
||||
fnbase[cl3](dest, data);
|
||||
}
|
||||
|
||||
vif.tag.addr += 16;
|
||||
--vifRegs.num;
|
||||
++vif.cl;
|
||||
|
||||
if (isFill)
|
||||
{
|
||||
//DevCon.WriteLn("isFill!");
|
||||
if (vif.cl <= vifRegs.cycle.cl)
|
||||
data += vSize;
|
||||
else if (vif.cl == vifRegs.cycle.wl)
|
||||
vif.cl = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
data += vSize;
|
||||
|
||||
if (vif.cl >= vifRegs.cycle.wl)
|
||||
{
|
||||
vif.tag.addr += skipSize;
|
||||
vif.cl = 0;
|
||||
}
|
||||
}
|
||||
} while (vifRegs.num);
|
||||
}
|
||||
|
||||
__fi void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill)
|
||||
{
|
||||
UnpackLoopTable[idx][!!mode][isFill](data);
|
||||
}
|
||||
|
|
|
@ -425,9 +425,6 @@
|
|||
<ClCompile Include="Vif_Codes.cpp" />
|
||||
<ClCompile Include="Vif_Transfer.cpp" />
|
||||
<ClCompile Include="Vif_Unpack.cpp" />
|
||||
<ClCompile Include="x86\newVif_Unpack.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="x86\newVif_Dynarec.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
|
@ -819,6 +816,8 @@
|
|||
<ClInclude Include="ps2\HwInternal.h" />
|
||||
<ClInclude Include="Cache.h" />
|
||||
<ClInclude Include="Memory.h" />
|
||||
<ClInclude Include="Vif_Dynarec.h" />
|
||||
<ClInclude Include="Vif_HashBucket.h" />
|
||||
<ClInclude Include="VMManager.h" />
|
||||
<ClInclude Include="vtlb.h" />
|
||||
<ClInclude Include="MTVU.h" />
|
||||
|
@ -838,7 +837,6 @@
|
|||
<ClInclude Include="Vif_Dma.h" />
|
||||
<ClInclude Include="Vif_Unpack.h" />
|
||||
<ClInclude Include="x86\newVif.h" />
|
||||
<ClInclude Include="x86\newVif_HashBucket.h" />
|
||||
<ClInclude Include="x86\newVif_UnpackSSE.h" />
|
||||
<ClInclude Include="SPR.h" />
|
||||
<ClInclude Include="Gif.h" />
|
||||
|
|
|
@ -512,9 +512,6 @@
|
|||
<ClCompile Include="Vif_Unpack.cpp">
|
||||
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="x86\newVif_Unpack.cpp">
|
||||
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="x86\newVif_Dynarec.cpp">
|
||||
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec</Filter>
|
||||
</ClCompile>
|
||||
|
@ -1484,9 +1481,6 @@
|
|||
<ClInclude Include="x86\newVif.h">
|
||||
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="x86\newVif_HashBucket.h">
|
||||
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="x86\newVif_UnpackSSE.h">
|
||||
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec</Filter>
|
||||
</ClInclude>
|
||||
|
@ -2303,6 +2297,12 @@
|
|||
<ClInclude Include="CDVD\FlatFileReader.h">
|
||||
<Filter>System\ISO</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Vif_Dynarec.h">
|
||||
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Vif_HashBucket.h">
|
||||
<Filter>System\Ps2\EmotionEngine\DMAC\Vif\Unpack\newVif\Dynarec</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuildStep Include="rdebug\deci2.h">
|
||||
|
|
|
@ -3,28 +3,14 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "Vif.h"
|
||||
#include "VU.h"
|
||||
#include "Vif_Dynarec.h"
|
||||
|
||||
#include "common/emitter/x86emitter.h"
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
// newVif_HashBucket.h uses this typedef, so it has to be declared first.
|
||||
typedef u32 (*nVifCall)(void*, const void*);
|
||||
typedef void (*nVifrecCall)(uptr dest, uptr src);
|
||||
|
||||
#include "newVif_HashBucket.h"
|
||||
|
||||
extern void mVUmergeRegs(const xRegisterSSE& dest, const xRegisterSSE& src, int xyzw, bool modXYZW = 0);
|
||||
extern void mVUsaveReg(const xRegisterSSE& reg, xAddressVoid ptr, int xyzw, bool modXYZW);
|
||||
extern void _nVifUnpack (int idx, const u8* data, uint mode, bool isFill);
|
||||
extern void dVifReset (int idx);
|
||||
extern void dVifClose (int idx);
|
||||
extern void dVifRelease (int idx);
|
||||
extern void VifUnpackSSE_Init();
|
||||
|
||||
_vifT extern void dVifUnpack(const u8* data, bool isFill);
|
||||
|
||||
#define VUFT VIFUnpackFuncTable
|
||||
#define _v0 0
|
||||
|
@ -37,31 +23,3 @@ _vifT extern void dVifUnpack(const u8* data, bool isFill);
|
|||
#define xmmCol3 xmm5
|
||||
#define xmmRow xmm6
|
||||
#define xmmTemp xmm7
|
||||
|
||||
struct nVifStruct
|
||||
{
|
||||
// Buffer for partial transfers (should always be first to ensure alignment)
|
||||
// Maximum buffer size is 256 (vifRegs.Num max range) * 16 (quadword)
|
||||
alignas(16) u8 buffer[256*16];
|
||||
u32 bSize; // Size of 'buffer'
|
||||
|
||||
// VIF0 or VIF1 - provided for debugging helpfulness only, and is generally unused.
|
||||
// (templates are used for most or all VIF indexing)
|
||||
u32 idx;
|
||||
|
||||
u8* recWritePtr; // current write pos into the reserve
|
||||
u8* recEndPtr;
|
||||
|
||||
HashBucket vifBlocks; // Vif Blocks
|
||||
|
||||
|
||||
nVifStruct() = default;
|
||||
};
|
||||
|
||||
extern void resetNewVif(int idx);
|
||||
|
||||
alignas(16) extern nVifStruct nVif[2];
|
||||
alignas(16) extern nVifCall nVifUpk[(2 * 2 * 16) * 4]; // ([USN][Masking][Unpack Type]) [curCycle]
|
||||
alignas(16) extern u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector]
|
||||
|
||||
static constexpr bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl
|
||||
|
|
|
@ -1,282 +0,0 @@
|
|||
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: LGPL-3.0+
|
||||
|
||||
#include "Common.h"
|
||||
#include "Vif_Dma.h"
|
||||
#include "newVif.h"
|
||||
#include "MTVU.h"
|
||||
|
||||
alignas(16) nVifStruct nVif[2];
|
||||
|
||||
// Interpreter-style SSE unpacks. Array layout matches the interpreter C unpacks.
|
||||
// ([USN][Masking][Unpack Type]) [curCycle]
|
||||
alignas(16) nVifCall nVifUpk[(2 * 2 * 16) * 4];
|
||||
|
||||
// This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks
|
||||
// and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly.
|
||||
// [MaskNumber][CycleNumber][Vector]
|
||||
alignas(16) u32 nVifMask[3][4][4] = {};
|
||||
|
||||
// Number of bytes of data in the source stream needed for each vector.
|
||||
// [equivalent to ((32 >> VL) * (VN+1)) / 8]
|
||||
alignas(16) const u8 nVifT[16] = {
|
||||
4, // S-32
|
||||
2, // S-16
|
||||
1, // S-8
|
||||
0, // ----
|
||||
8, // V2-32
|
||||
4, // V2-16
|
||||
2, // V2-8
|
||||
0, // ----
|
||||
12,// V3-32
|
||||
6, // V3-16
|
||||
3, // V3-8
|
||||
0, // ----
|
||||
16,// V4-32
|
||||
8, // V4-16
|
||||
4, // V4-8
|
||||
2, // V4-5
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
template <int idx, bool doMode, bool isFill>
|
||||
__ri void _nVifUnpackLoop(const u8* data);
|
||||
|
||||
typedef void FnType_VifUnpackLoop(const u8* data);
|
||||
typedef FnType_VifUnpackLoop* Fnptr_VifUnpackLoop;
|
||||
|
||||
// Unpacks Until 'Num' is 0
|
||||
alignas(16) static const Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
|
||||
{
|
||||
{_nVifUnpackLoop<0, 0, 0>, _nVifUnpackLoop<0, 0, 1>},
|
||||
{_nVifUnpackLoop<0, 1, 0>, _nVifUnpackLoop<0, 1, 1>},
|
||||
},
|
||||
{
|
||||
{_nVifUnpackLoop<1, 0, 0>, _nVifUnpackLoop<1, 0, 1>},
|
||||
{_nVifUnpackLoop<1, 1, 0>, _nVifUnpackLoop<1, 1, 1>},
|
||||
},
|
||||
};
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void resetNewVif(int idx)
|
||||
{
|
||||
// Safety Reset : Reassign all VIF structure info, just in case the VU1 pointers have
|
||||
// changed for some reason.
|
||||
|
||||
nVif[idx].idx = idx;
|
||||
nVif[idx].bSize = 0;
|
||||
std::memset(nVif[idx].buffer, 0, sizeof(nVif[idx].buffer));
|
||||
|
||||
if (newVifDynaRec)
|
||||
dVifReset(idx);
|
||||
}
|
||||
|
||||
void releaseNewVif(int idx)
|
||||
{
|
||||
}
|
||||
|
||||
static __fi u8* getVUptr(uint idx, int offset)
|
||||
{
|
||||
return (u8*)(vuRegs[idx].Mem + (offset & (idx ? 0x3ff0 : 0xff0)));
|
||||
}
|
||||
|
||||
|
||||
_vifT int nVifUnpack(const u8* data)
|
||||
{
|
||||
nVifStruct& v = nVif[idx];
|
||||
vifStruct& vif = GetVifX;
|
||||
VIFregisters& vifRegs = vifXRegs;
|
||||
|
||||
const uint wl = vifRegs.cycle.wl ? vifRegs.cycle.wl : 256;
|
||||
const uint ret = std::min(vif.vifpacketsize, vif.tag.size);
|
||||
const bool isFill = (vifRegs.cycle.cl < wl);
|
||||
s32 size = ret << 2;
|
||||
|
||||
if (ret == vif.tag.size) // Full Transfer
|
||||
{
|
||||
if (v.bSize) // Last transfer was partial
|
||||
{
|
||||
memcpy(&v.buffer[v.bSize], data, size);
|
||||
v.bSize += size;
|
||||
size = v.bSize;
|
||||
data = v.buffer;
|
||||
|
||||
vif.cl = 0;
|
||||
vifRegs.num = (vifXRegs.code >> 16) & 0xff; // grab NUM form the original VIFcode input.
|
||||
if (!vifRegs.num)
|
||||
vifRegs.num = 256;
|
||||
}
|
||||
|
||||
if (!idx || !THREAD_VU1)
|
||||
{
|
||||
if (newVifDynaRec)
|
||||
dVifUnpack<idx>(data, isFill);
|
||||
else
|
||||
_nVifUnpack(idx, data, vifRegs.mode, isFill);
|
||||
}
|
||||
else
|
||||
vu1Thread.VifUnpack(vif, vifRegs, (u8*)data, (size + 4) & ~0x3);
|
||||
|
||||
vif.pass = 0;
|
||||
vif.tag.size = 0;
|
||||
vif.cmd = 0;
|
||||
vifRegs.num = 0;
|
||||
v.bSize = 0;
|
||||
}
|
||||
else // Partial Transfer
|
||||
{
|
||||
memcpy(&v.buffer[v.bSize], data, size);
|
||||
v.bSize += size;
|
||||
vif.tag.size -= ret;
|
||||
|
||||
const u8& vSize = nVifT[vif.cmd & 0x0f];
|
||||
|
||||
// We need to provide accurate accounting of the NUM register, in case games decided
|
||||
// to read back from it mid-transfer. Since so few games actually use partial transfers
|
||||
// of VIF unpacks, this code should not be any bottleneck.
|
||||
|
||||
if (!isFill)
|
||||
{
|
||||
vifRegs.num -= (size / vSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
int dataSize = (size / vSize);
|
||||
vifRegs.num = vifRegs.num - (((dataSize / vifRegs.cycle.cl) * (vifRegs.cycle.wl - vifRegs.cycle.cl)) + dataSize);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template int nVifUnpack<0>(const u8* data);
|
||||
template int nVifUnpack<1>(const u8* data);
|
||||
|
||||
// This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks
|
||||
// and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly.
|
||||
static void setMasks(const vifStruct& vif, const VIFregisters& v)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
int m = (v.mask >> (i * 2)) & 3;
|
||||
switch (m)
|
||||
{
|
||||
case 0: // Data
|
||||
nVifMask[0][i / 4][i % 4] = 0xffffffff;
|
||||
nVifMask[1][i / 4][i % 4] = 0;
|
||||
nVifMask[2][i / 4][i % 4] = 0;
|
||||
break;
|
||||
case 1: // MaskRow
|
||||
nVifMask[0][i / 4][i % 4] = 0;
|
||||
nVifMask[1][i / 4][i % 4] = 0;
|
||||
nVifMask[2][i / 4][i % 4] = vif.MaskRow._u32[i % 4];
|
||||
break;
|
||||
case 2: // MaskCol
|
||||
nVifMask[0][i / 4][i % 4] = 0;
|
||||
nVifMask[1][i / 4][i % 4] = 0;
|
||||
nVifMask[2][i / 4][i % 4] = vif.MaskCol._u32[i / 4];
|
||||
break;
|
||||
case 3: // Write Protect
|
||||
nVifMask[0][i / 4][i % 4] = 0;
|
||||
nVifMask[1][i / 4][i % 4] = 0xffffffff;
|
||||
nVifMask[2][i / 4][i % 4] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Unpacking Optimization notes:
|
||||
// ----------------------------------------------------------------------------
|
||||
// Some games send a LOT of single-cycle packets (God of War, SotC, TriAce games, etc),
|
||||
// so we always need to be weary of keeping loop setup code optimized. It's not always
|
||||
// a "win" to move code outside the loop, like normally in most other loop scenarios.
|
||||
//
|
||||
// The biggest bottleneck of the current code is the call/ret needed to invoke the SSE
|
||||
// unpackers. A better option is to generate the entire vifRegs.num loop code as part
|
||||
// of the SSE template, and inline the SSE code into the heart of it. This both avoids
|
||||
// the call/ret and opens the door for resolving some register dependency chains in the
|
||||
// current emitted functions. (this is what zero's SSE does to get it's final bit of
|
||||
// speed advantage over the new vif). --air
|
||||
//
|
||||
// The BEST optimizatin strategy here is to use data available to us from the UNPACK dispatch
|
||||
// -- namely the unpack type and mask flag -- in combination mode and usn values -- to
|
||||
// generate ~600 special versions of this function. But since it's an interpreter, who gives
|
||||
// a crap? Really? :p
|
||||
//
|
||||
|
||||
// size - size of the packet fragment incoming from DMAC.
|
||||
template <int idx, bool doMode, bool isFill>
|
||||
__ri void _nVifUnpackLoop(const u8* data)
|
||||
{
|
||||
|
||||
vifStruct& vif = MTVU_VifX;
|
||||
VIFregisters& vifRegs = MTVU_VifXRegs;
|
||||
|
||||
// skipSize used for skipping writes only
|
||||
const int skipSize = (vifRegs.cycle.cl - vifRegs.cycle.wl) * 16;
|
||||
|
||||
//DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs.num, upkNum, vif.cl, blockSize, skipSize);
|
||||
|
||||
if (!doMode && (vif.cmd & 0x10))
|
||||
setMasks(vif, vifRegs);
|
||||
|
||||
const int usn = !!vif.usn;
|
||||
const int upkNum = vif.cmd & 0x1f;
|
||||
const u8& vSize = nVifT[upkNum & 0x0f];
|
||||
//uint vl = vif.cmd & 0x03;
|
||||
//uint vn = (vif.cmd >> 2) & 0x3;
|
||||
//uint vSize = ((32 >> vl) * (vn+1)) / 8; // size of data (in bytes) used for each write cycle
|
||||
|
||||
const nVifCall* fnbase = &nVifUpk[((usn * 2 * 16) + upkNum) * (4 * 1)];
|
||||
const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][((usn * 2 * 16) + upkNum)];
|
||||
|
||||
pxAssume(vif.cl == 0);
|
||||
//pxAssume (vifRegs.cycle.wl > 0);
|
||||
|
||||
do
|
||||
{
|
||||
u8* dest = getVUptr(idx, vif.tag.addr);
|
||||
|
||||
if (doMode)
|
||||
{
|
||||
//if (1) {
|
||||
ft(dest, data);
|
||||
}
|
||||
else
|
||||
{
|
||||
//DevCon.WriteLn("SSE Unpack!");
|
||||
uint cl3 = std::min(vif.cl, 3);
|
||||
fnbase[cl3](dest, data);
|
||||
}
|
||||
|
||||
vif.tag.addr += 16;
|
||||
--vifRegs.num;
|
||||
++vif.cl;
|
||||
|
||||
if (isFill)
|
||||
{
|
||||
//DevCon.WriteLn("isFill!");
|
||||
if (vif.cl <= vifRegs.cycle.cl)
|
||||
data += vSize;
|
||||
else if (vif.cl == vifRegs.cycle.wl)
|
||||
vif.cl = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
data += vSize;
|
||||
|
||||
if (vif.cl >= vifRegs.cycle.wl)
|
||||
{
|
||||
vif.tag.addr += skipSize;
|
||||
vif.cl = 0;
|
||||
}
|
||||
}
|
||||
} while (vifRegs.num);
|
||||
}
|
||||
|
||||
__fi void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill)
|
||||
{
|
||||
|
||||
UnpackLoopTable[idx][!!mode][isFill](data);
|
||||
}
|
Loading…
Reference in New Issue