Frameskipping more aggressive (minor speedup, plz report any serious problems). Initial display list cache implementation, disabled for now. Various cleanup.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3952 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2009-08-09 11:03:58 +00:00
parent 95b39da7ca
commit 6003c9ecd2
13 changed files with 808 additions and 131 deletions

View File

@ -20,17 +20,22 @@
extern u8* g_pVideoData;
inline u8 DataPeek8(u32 _uOffset)
inline void DataSkip(u32 skip)
{
g_pVideoData += skip;
}
inline u8 DataPeek8(int _uOffset)
{
return g_pVideoData[_uOffset];
}
inline u16 DataPeek16(u32 _uOffset)
inline u16 DataPeek16(int _uOffset)
{
return Common::swap16(*(u16*)&g_pVideoData[_uOffset]);
}
inline u32 DataPeek32(u32 _uOffset)
inline u32 DataPeek32(int _uOffset)
{
return Common::swap32(*(u32*)&g_pVideoData[_uOffset]);
}
@ -118,9 +123,4 @@ inline u8* DataGetPosition()
return g_pVideoData;
}
inline void DataSkip(u32 skip)
{
g_pVideoData += skip;
}
#endif

View File

@ -97,7 +97,8 @@ void Fifo_ExitLoop()
// May be executed from any thread, even the graphics thread.
// Created to allow for self shutdown.
void Fifo_ExitLoopNonBlocking() {
void Fifo_ExitLoopNonBlocking()
{
fifoStateRun = false;
}
@ -118,7 +119,7 @@ void Fifo_SendFifoData(u8* _uData, u32 len)
// Copy new video instructions to videoBuffer for future use in rendering the new picture
memcpy(videoBuffer + size, _uData, len);
size += len;
OpcodeDecoder_Run();
OpcodeDecoder_Run(g_bSkipCurrentFrame);
}
// Description: Main FIFO update loop
@ -146,7 +147,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
while (_fifo.bFF_GPReadEnable && _fifo.CPReadWriteDistance)
{
if(!fifoStateRun)
if (!fifoStateRun)
break;
// Create pointer to video data and send it to the VideoPlugin

View File

@ -19,10 +19,11 @@
// Ikaruga uses (nearly) NO display lists!
// Zelda WW uses TONS of display lists
// Zelda TP uses almost 100% display lists except menus (we like this!)
// Super Mario Galaxy has nearly all geometry and more than half of the state in DLs (great!)
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they are
// and hope that the vertex format doesn't change, though, if you do it just when they are
// called. The reason is that the vertex format affects the sizes of the vertices.
// while interpreting them, and hope that the vertex format doesn't change, though, if you do it right
// when they are called. The reason is that the vertex format affects the sizes of the vertices.
#include "Common.h"
#include "VideoCommon.h"
@ -47,13 +48,12 @@ extern u8* FAKE_GetFifoEndPtr();
static void Decode();
static void ExecuteDisplayList(u32 address, u32 size)
void InterpretDisplayList(u32 address, u32 size)
{
u8* old_pVideoData = g_pVideoData;
u8* startAddress = Memory_GetPtr(address);
//Avoid the crash if Memory_GetPtr failed ..
// Avoid the crash if Memory_GetPtr failed ..
if (startAddress != 0)
{
g_pVideoData = startAddress;
@ -61,7 +61,8 @@ static void ExecuteDisplayList(u32 address, u32 size)
// temporarily swap dl and non-dl (small "hack" for the stats)
Statistics::SwapDL();
while ((u32)(g_pVideoData - startAddress) < size)
u8 *end = g_pVideoData + size;
while (g_pVideoData < end)
{
Decode();
}
@ -76,48 +77,60 @@ static void ExecuteDisplayList(u32 address, u32 size)
g_pVideoData = old_pVideoData;
}
// Defer to plugin-specific DL cache.
extern bool HandleDisplayList(u32 address, u32 size);
void ExecuteDisplayList(u32 address, u32 size)
{
if (!HandleDisplayList(address, size))
InterpretDisplayList(address, size);
}
bool FifoCommandRunnable()
{
u32 iBufferSize = (u32)(FAKE_GetFifoEndPtr() - g_pVideoData);
if (iBufferSize == 0)
u32 buffer_size = (u32)(FAKE_GetFifoEndPtr() - g_pVideoData);
if (buffer_size == 0)
return false; // can't peek
u8 Cmd = DataPeek8(0);
u32 iCommandSize = 0;
u8 cmd_byte = DataPeek8(0);
u32 command_size = 0;
switch (Cmd)
switch (cmd_byte)
{
case GX_NOP: // Hm, this means that we scan over nop streams pretty slowly...
case GX_CMD_INVL_VC: // Invalidate Vertex Cache - no parameters
case 0x44: // zelda 4 swords calls it and checks the metrics registers after that
iCommandSize = 1;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
command_size = 1;
break;
case GX_LOAD_BP_REG:
command_size = 5;
break;
case GX_LOAD_CP_REG:
iCommandSize = 6;
command_size = 6;
break;
case GX_LOAD_INDX_A:
case GX_LOAD_INDX_B:
case GX_LOAD_INDX_C:
case GX_LOAD_INDX_D:
case GX_LOAD_BP_REG:
iCommandSize = 5;
command_size = 5;
break;
case GX_CMD_CALL_DL:
iCommandSize = 9;
command_size = 9;
break;
case GX_LOAD_XF_REG:
{
// check if we can read the header
if (iBufferSize >= 5)
if (buffer_size >= 5)
{
iCommandSize = 1 + 4;
command_size = 1 + 4;
u32 Cmd2 = DataPeek32(1);
int dwTransferSize = ((Cmd2 >> 16) & 15) + 1;
iCommandSize += dwTransferSize * 4;
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
command_size += transfer_size * 4;
}
else
{
@ -127,14 +140,14 @@ bool FifoCommandRunnable()
break;
default:
if (Cmd & 0x80)
if (cmd_byte & 0x80)
{
// check if we can read the header
if (iBufferSize >= 3)
if (buffer_size >= 3)
{
iCommandSize = 1 + 2;
command_size = 1 + 2;
u16 numVertices = DataPeek16(1);
iCommandSize += numVertices * VertexLoaderManager::GetVertexSize(Cmd & GX_VAT_MASK);
command_size += numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK);
}
else
{
@ -151,14 +164,14 @@ bool FifoCommandRunnable()
"* Command stream corrupted by some spurious memory bug\n"
"* This really is an unknown opcode (unlikely)\n"
"* Some other sort of bug\n\n"
"Dolphin will now likely crash or hang. Enjoy." , Cmd);
"Dolphin will now likely crash or hang. Enjoy." , cmd_byte);
g_VideoInitialize.pSysMessage(szTemp);
g_VideoInitialize.pLog(szTemp, TRUE);
{
SCPFifoStruct &fifo = *g_VideoInitialize.pCPFifo;
char szTmp[256];
// sprintf(szTmp, "Illegal command %02x (at %08x)",Cmd,g_pDataReader->GetPtr());
// sprintf(szTmp, "Illegal command %02x (at %08x)",cmd_byte,g_pDataReader->GetPtr());
sprintf(szTmp, "Illegal command %02x\n"
"CPBase: 0x%08x\n"
"CPEnd: 0x%08x\n"
@ -172,42 +185,39 @@ bool FifoCommandRunnable()
"bFF_BPEnable: %s\n"
"bFF_GPLinkEnable: %s\n"
"bFF_Breakpoint: %s\n"
,Cmd, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, fifo.CPReadWriteDistance
,cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, fifo.CPReadWriteDistance
,fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false"
,fifo.bFF_BPEnable ? "true" : "false" ,fifo.bFF_GPLinkEnable ? "true" : "false"
,fifo.bFF_Breakpoint ? "true" : "false");
g_VideoInitialize.pSysMessage(szTmp);
g_VideoInitialize.pLog(szTmp, TRUE);
// _assert_msg_(0,szTmp,"");
}
}
break;
}
if (iCommandSize > iBufferSize)
if (command_size > buffer_size)
return false;
// INFO_LOG("OP detected: Cmd 0x%x size %i buffer %i",Cmd, iCommandSize, iBufferSize);
// INFO_LOG("OP detected: cmd_byte 0x%x size %i buffer %i",cmd_byte, command_size, buffer_size);
return true;
}
static void Decode()
{
int Cmd = DataReadU8();
switch(Cmd)
int cmd_byte = DataReadU8();
switch (cmd_byte)
{
case GX_NOP:
break;
case GX_LOAD_CP_REG: //0x08
{
u32 SubCmd = DataReadU8();
u32 Value = DataReadU32();
LoadCPReg(SubCmd, Value);
u8 sub_cmd = DataReadU8();
u32 value = DataReadU32();
LoadCPReg(sub_cmd, value);
INCSTAT(stats.thisFrame.numCPLoads);
}
break;
@ -215,13 +225,13 @@ static void Decode()
case GX_LOAD_XF_REG:
{
u32 Cmd2 = DataReadU32();
int dwTransferSize = ((Cmd2 >> 16) & 15) + 1;
u32 dwAddress = Cmd2 & 0xFFFF;
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
u32 address = Cmd2 & 0xFFFF;
// TODO - speed this up. pshufb?
static u32 pData[16];
for (int i = 0; i < dwTransferSize; i++)
pData[i] = DataReadU32();
LoadXFReg(dwTransferSize, dwAddress, pData);
u32 data_buffer[16];
for (int i = 0; i < transfer_size; i++)
data_buffer[i] = DataReadU32();
LoadXFReg(transfer_size, address, data_buffer);
INCSTAT(stats.thisFrame.numXFLoads);
}
break;
@ -241,13 +251,13 @@ static void Decode()
case GX_CMD_CALL_DL:
{
u32 dwAddr = DataReadU32();
u32 dwCount = DataReadU32();
ExecuteDisplayList(dwAddr, dwCount);
u32 address = DataReadU32();
u32 count = DataReadU32();
ExecuteDisplayList(address, count);
}
break;
case 0x44: // zelda 4 swords calls it and checks the metrics registers after that
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
DEBUG_LOG(VIDEO, "GX 0x44: %08x", Cmd);
break;
@ -257,31 +267,107 @@ static void Decode()
case GX_LOAD_BP_REG: //0x61
{
u32 cmd = DataReadU32();
LoadBPReg(cmd);
u32 bp_cmd = DataReadU32();
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
}
break;
// draw primitives
default:
if (Cmd & 0x80)
if (cmd_byte & 0x80)
{
// load vertices (use computed vertex size from FifoCommandRunnable above)
u16 numVertices = DataReadU16();
VertexLoaderManager::RunVertices(
Cmd & GX_VAT_MASK, // Vertex loader index (0 - 7)
(Cmd & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices);
}
else
{
// char szTmp[256];
//sprintf(szTmp, "Illegal command %02x (at %08x)",Cmd,g_pDataReader->GetPtr());
//g_VideoInitialize.pLog(szTmp);
//MessageBox(0,szTmp,"GFX ERROR",0);
// _assert_msg_(0,szTmp,"");
ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte);
break;
}
break;
}
}
static void DecodeSemiNop()
{
int cmd_byte = DataReadU8();
switch (cmd_byte)
{
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
case GX_NOP:
break;
case GX_LOAD_CP_REG: //0x08
// We have to let CP writes through because they determine the size of vertices.
{
u8 sub_cmd = DataReadU8();
u32 value = DataReadU32();
LoadCPReg(sub_cmd, value);
INCSTAT(stats.thisFrame.numCPLoads);
}
break;
case GX_LOAD_XF_REG:
{
u32 Cmd2 = DataReadU32();
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
u32 address = Cmd2 & 0xFFFF;
// TODO - speed this up. pshufb?
u32 data_buffer[16];
for (int i = 0; i < transfer_size; i++)
data_buffer[i] = DataReadU32();
LoadXFReg(transfer_size, address, data_buffer);
INCSTAT(stats.thisFrame.numXFLoads);
}
break;
case GX_LOAD_INDX_A: //used for position matrices
LoadIndexedXF(DataReadU32(), 0xC);
break;
case GX_LOAD_INDX_B: //used for normal matrices
LoadIndexedXF(DataReadU32(), 0xD);
break;
case GX_LOAD_INDX_C: //used for postmatrices
LoadIndexedXF(DataReadU32(), 0xE);
break;
case GX_LOAD_INDX_D: //used for lights
LoadIndexedXF(DataReadU32(), 0xF);
break;
case GX_CMD_CALL_DL:
// Hm, wonder if any games put tokens in display lists - in that case,
// we'll have to parse them too.
DataSkip(8);
break;
case GX_LOAD_BP_REG: //0x61
// We have to let BP writes through because they set tokens and stuff.
// TODO: Call a much simplified LoadBPReg instead.
{
u32 bp_cmd = DataReadU32();
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
}
break;
// draw primitives
default:
if (cmd_byte & 0x80)
{
// load vertices (use computed vertex size from FifoCommandRunnable above)
u16 numVertices = DataReadU16();
DataSkip(numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK));
}
else
{
ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte);
break;
}
break;
@ -298,13 +384,17 @@ void OpcodeDecoder_Shutdown()
{
}
void OpcodeDecoder_Run()
void OpcodeDecoder_Run(bool skipped_frame)
{
DVSTARTPROFILE();
while (FifoCommandRunnable())
{
//TODO?: if really needed, do something like this: "InterlockedExchange((LONG*)&_fifo.CPCmdIdle, 0);"
Decode();
}
//TODO?: if really needed, do something like this: "InterlockedExchange((LONG*)&_fifo.CPCmdIdle, 1);"
}
DVSTARTPROFILE();
if (!skipped_frame)
{
while (FifoCommandRunnable())
Decode();
}
else
{
while (FifoCommandRunnable())
DecodeSemiNop();
}
}

View File

@ -29,6 +29,7 @@
#define GX_LOAD_INDX_D 0x38
#define GX_CMD_CALL_DL 0x40
#define GX_CMD_UNKNOWN_METRICS 0x44
#define GX_CMD_INVL_VC 0x48
#define GX_PRIMITIVE_MASK 0x78
@ -46,6 +47,6 @@
void OpcodeDecoder_Init();
void OpcodeDecoder_Shutdown();
void OpcodeDecoder_Run();
void OpcodeDecoder_Run(bool skipped_frame);
#endif // _OPCODE_DECODING_H

View File

@ -234,6 +234,7 @@ void PixelShaderManager::SetPSTextureDims(int texid)
SetPSConstant4fv(C_TEXDIMS + texid, fdims);
}
// This one is high in profiles (0.5%)
void PixelShaderManager::SetColorChanged(int type, int num)
{
int r = bpmem.tevregs[num].low.a;
@ -241,10 +242,10 @@ void PixelShaderManager::SetColorChanged(int type, int num)
int b = bpmem.tevregs[num].high.a;
int g = bpmem.tevregs[num].high.b;
float *pf = &lastRGBAfull[type][num][0];
pf[0] = (float)r / 255.0f;
pf[1] = (float)g / 255.0f;
pf[2] = (float)b / 255.0f;
pf[3] = (float)a / 255.0f;
pf[0] = (float)r * (1.0f / 255.0f);
pf[1] = (float)g * (1.0f / 255.0f);
pf[2] = (float)b * (1.0f / 255.0f);
pf[3] = (float)a * (1.0f / 255.0f);
s_nColorsChanged[type] |= 1 << num;
PRIM_LOG("pixel %scolor%d: %f %f %f %f\n", type?"k":"", num, pf[0], pf[1], pf[2], pf[3]);
}

View File

@ -289,6 +289,7 @@ void LOADERDECL TexCoord_ReadIndex16_Short1()
}
void LOADERDECL TexCoord_ReadIndex16_Short2()
{
// Heavy in ZWW
u16 Index = DataReadU16();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];

View File

@ -18,33 +18,26 @@
#include "Common.h"
#include "UCode_Zelda.h"
void CUCode_Zelda::AFCdecodebuffer(const s16 *coef, const char *input, signed short *out, short *histp, short *hist2p, int type)
void CUCode_Zelda::AFCdecodebuffer(const s16 *coef, const char *src, signed short *out, short *histp, short *hist2p, int type)
{
short nibbles[16];
short hist = *histp;
short hist2 = *hist2p;
const char *src = input;
char *dst = (char*)out;
// First 2 nibbles are ADPCM scale etc.
short delta = 1 << (((*src) >> 4) & 0xf);
short idx = (*src) & 0xf;
src++;
short nibbles[16];
if (type == 9)
{
for (int i = 0; i < 16; i = i + 2) {
int j = (*src & 255) >> 4;
nibbles[i] = j;
j = *src & 255 & 15;
nibbles[i+1] = j;
for (int i = 0; i < 16; i += 2)
{
nibbles[i + 0] = *src >> 4;
nibbles[i + 1] = *src & 15;
src++;
}
for (int i = 0; i < 16; i = i + 1) {
for (int i = 0; i < 16; i++) {
if (nibbles[i] >= 8)
nibbles[i] = nibbles[i] - 16;
nibbles[i] <<= 11;
}
}
else
@ -52,45 +45,33 @@ void CUCode_Zelda::AFCdecodebuffer(const s16 *coef, const char *input, signed sh
// In Pikmin, Dolphin's engine sound is using AFC 5bits, even though such a sound is hard
// to compare, it seems like to sound exactly like a real GC
DEBUG_LOG(DSPHLE, "5 bits AFC sample");
for (int i = 0; i < 16; i += 4)
{
int j = (*src >> 0) & 0x02;
nibbles[i] = j;
j = (*src >> 2) & 0x02;
nibbles[i+1] = j;
j = (*src >> 4) & 0x02;
nibbles[i+2] = j;
j = (*src >> 6) & 0x02;
nibbles[i+3] = j;
nibbles[i + 0] = (*src >> 6) & 0x02;
nibbles[i + 1] = (*src >> 4) & 0x02;
nibbles[i + 2] = (*src >> 2) & 0x02;
nibbles[i + 3] = (*src >> 0) & 0x02;
src++;
}
for (int i = 0; i < 16; i++)
{
if (nibbles[i] >= 2)
nibbles[i] = nibbles[i] - 4;
nibbles[i] <<= 13;
}
}
short hist = *histp;
short hist2 = *hist2p;
for (int i = 0; i < 16; i++)
{
int sample = (delta * nibbles[i]) << 11;
sample += ((long)hist * coef[idx * 2]) + ((long)hist2 * coef[idx * 2 + 1]);
sample = sample >> 11;
if (sample > 32767) {
int sample = delta * nibbles[i] + ((long)hist * coef[idx * 2]) + ((long)hist2 * coef[idx * 2 + 1]);
sample >>= 11;
if (sample > 32767)
sample = 32767;
}
if (sample < -32768) {
if (sample < -32768)
sample = -32768;
}
*(short*)dst = (short)sample;
dst = dst + 2;
out[i] = sample;
hist2 = hist;
hist = (short)sample;
}

View File

@ -720,6 +720,14 @@
RelativePath=".\Src\BPFunctions.cpp"
>
</File>
<File
RelativePath=".\Src\DLCache.cpp"
>
</File>
<File
RelativePath=".\Src\DLCache.h"
>
</File>
<File
RelativePath=".\Src\NativeVertexFormat.cpp"
>

View File

@ -0,0 +1,564 @@
// Copyright (C) 2003-2009 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// TODO: Handle cache-is-full condition :p
#include <map>
#include "Common.h"
#include "VideoCommon.h"
#include "Hash.h"
#include "MemoryUtil.h"
#include "DataReader.h"
#include "Statistics.h"
#include "OpcodeDecoding.h" // For the GX_ constants.
#include "XFMemory.h"
#include "CPMemory.h"
#include "BPMemory.h"
#include "VertexManager.h"
#include "VertexLoaderManager.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "DLCache.h"
#define DL_CODE_CACHE_SIZE (1024*1024*16)
#define DL_STATIC_DATA_SIZE (1024*1024*4)
extern int frameCount;
using namespace Gen;
namespace DLCache
{
// Currently just recompiles the DLs themselves, doesn't bother with the vertex data.
// The speed boost is pretty small. The real big boost will come when we also store
// vertex arrays in the cached DLs.
enum DisplayListPass {
DLPASS_ANALYZE,
DLPASS_COMPILE,
DLPASS_RUN,
};
struct VDataHashRegion
{
u32 hash;
u32 start_address;
int size;
};
struct CachedDisplayList
{
CachedDisplayList()
: uncachable(false),
pass(DLPASS_ANALYZE),
next_check(1)
{
frame_count = frameCount;
}
int pass;
u32 dl_hash;
int check;
int next_check;
u32 vdata_hash;
std::vector<VDataHashRegion> hash_regions;
int frame_count;
bool uncachable; // if set, this DL will always be interpreted. This gets set if hash ever changes.
// ... Something containing cached vertex buffers here ...
// Compile the commands themselves down to native code.
const u8 *compiled_code;
};
// We want to allow caching DLs that start at the same address but have different lengths,
// so the size has to be in the ID.
inline u64 CreateMapId(u32 address, u32 size)
{
return ((u64)address << 32) | size;
}
typedef std::map<u64, CachedDisplayList> DLMap;
static DLMap dl_map;
static u8 *dlcode_cache;
static u8 *static_data_buffer;
static u8 *static_data_ptr;
static Gen::XEmitter emitter;
// Everything gets free'd when the cache is cleared.
u8 *AllocStaticData(int size)
{
u8 *cur_ptr = static_data_ptr;
static_data_ptr += (size + 3) & ~3;
return cur_ptr;
}
// First pass - analyze
bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
{
int num_xf_reg = 0;
int num_cp_reg = 0;
int num_bp_reg = 0;
int num_index_xf = 0;
int num_draw_call = 0;
u8 *old_datareader = g_pVideoData;
g_pVideoData = Memory_GetPtr(address);
u8 *end = g_pVideoData + size;
while (g_pVideoData < end)
{
// Yet another reimplementation of the DL reading...
int cmd_byte = DataReadU8();
switch (cmd_byte)
{
case GX_NOP:
break;
case GX_LOAD_CP_REG: //0x08
{
// Execute
u8 sub_cmd = DataReadU8();
u32 value = DataReadU32();
LoadCPReg(sub_cmd, value);
INCSTAT(stats.thisFrame.numCPLoads);
// Analyze
num_cp_reg++;
}
break;
case GX_LOAD_XF_REG:
{
// Execute
u32 Cmd2 = DataReadU32();
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
u32 address = Cmd2 & 0xFFFF;
// TODO - speed this up. pshufb?
u32 data_buffer[16];
for (int i = 0; i < transfer_size; i++)
data_buffer[i] = DataReadU32();
LoadXFReg(transfer_size, address, data_buffer);
INCSTAT(stats.thisFrame.numXFLoads);
// Analyze
num_xf_reg++;
}
break;
case GX_LOAD_INDX_A: //used for position matrices
{
u32 value = DataReadU32();
// Execute
LoadIndexedXF(value, 0xC);
// Analyze
num_index_xf++;
}
break;
case GX_LOAD_INDX_B: //used for normal matrices
{
u32 value = DataReadU32();
// Execute
LoadIndexedXF(value, 0xD);
// Analyze
num_index_xf++;
}
break;
case GX_LOAD_INDX_C: //used for postmatrices
{
u32 value = DataReadU32();
// Execute
LoadIndexedXF(value, 0xE);
// Analyze
num_index_xf++;
}
break;
case GX_LOAD_INDX_D: //used for lights
{
u32 value = DataReadU32();
// Execute
LoadIndexedXF(value, 0xF);
// Analyze
num_index_xf++;
}
break;
case GX_CMD_CALL_DL:
PanicAlert("Seeing DL call inside DL.");
break;
case GX_CMD_UNKNOWN_METRICS:
// zelda 4 swords calls it and checks the metrics registers after that
break;
case GX_CMD_INVL_VC:// Invalidate (vertex cache?)
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
break;
case GX_LOAD_BP_REG: //0x61
{
u32 bp_cmd = DataReadU32();
// Execute
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
// Analyze
}
break;
// draw primitives
default:
if (cmd_byte & 0x80)
{
// load vertices (use computed vertex size from FifoCommandRunnable above)
// Execute
u16 numVertices = DataReadU16();
VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices);
// Analyze
}
else
{
ERROR_LOG(VIDEO, "DLCache::CompileAndRun: Illegal command %02x", cmd_byte);
break;
}
break;
}
}
g_pVideoData = old_datareader;
return true;
}
// The only sensible way to detect changes to vertex data is to convert several times
// and hash the output.
// Second pass - compile
// Since some commands can affect the size of other commands, we really have no choice
// but to compile as we go, interpreting the list. We can't compile and then execute, we must
// compile AND execute at the same time. The second time the display list gets called, we already
// have the compiled code so we don't have to interpret anymore, we just run it.
bool CompileAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
{
VertexManager::Flush();
u8 *old_datareader = g_pVideoData;
g_pVideoData = Memory_GetPtr(address);
u8 *end = g_pVideoData + size;
emitter.AlignCode4();
dl->compiled_code = emitter.GetCodePtr();
emitter.ABI_EmitPrologue(4);
while (g_pVideoData < end)
{
// Yet another reimplementation of the DL reading...
int cmd_byte = DataReadU8();
switch (cmd_byte)
{
case GX_NOP:
// Execute
// Compile
break;
case GX_LOAD_CP_REG: //0x08
{
// Execute
u8 sub_cmd = DataReadU8();
u32 value = DataReadU32();
LoadCPReg(sub_cmd, value);
INCSTAT(stats.thisFrame.numCPLoads);
// Compile
emitter.ABI_CallFunctionCC(&LoadCPReg, sub_cmd, value);
}
break;
case GX_LOAD_XF_REG:
{
// Execute
u32 Cmd2 = DataReadU32();
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
u32 address = Cmd2 & 0xFFFF;
// TODO - speed this up. pshufb?
u8 *real_data_buffer = AllocStaticData(4 * transfer_size);
u32 *data_buffer = (u32 *)real_data_buffer;
for (int i = 0; i < transfer_size; i++)
data_buffer[i] = DataReadU32();
LoadXFReg(transfer_size, address, data_buffer);
INCSTAT(stats.thisFrame.numXFLoads);
// Compile
emitter.ABI_CallFunctionCCP(&LoadXFReg, transfer_size, address, data_buffer);
}
break;
case GX_LOAD_INDX_A: //used for position matrices
{
u32 value = DataReadU32();
// Execute
LoadIndexedXF(value, 0xC);
// Compile
emitter.ABI_CallFunctionCC(&LoadIndexedXF, value, 0xC);
}
break;
case GX_LOAD_INDX_B: //used for normal matrices
{
u32 value = DataReadU32();
// Execute
LoadIndexedXF(value, 0xD);
// Compile
emitter.ABI_CallFunctionCC(&LoadIndexedXF, value, 0xD);
}
break;
case GX_LOAD_INDX_C: //used for postmatrices
{
u32 value = DataReadU32();
// Execute
LoadIndexedXF(value, 0xE);
// Compile
emitter.ABI_CallFunctionCC(&LoadIndexedXF, value, 0xE);
}
break;
case GX_LOAD_INDX_D: //used for lights
{
u32 value = DataReadU32();
// Execute
LoadIndexedXF(value, 0xF);
// Compile
emitter.ABI_CallFunctionCC(&LoadIndexedXF, value, 0xF);
}
break;
case GX_CMD_CALL_DL:
PanicAlert("Seeing DL call inside DL.");
break;
case GX_CMD_UNKNOWN_METRICS:
// zelda 4 swords calls it and checks the metrics registers after that
break;
case GX_CMD_INVL_VC:// Invalidate (vertex cache?)
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
break;
case GX_LOAD_BP_REG: //0x61
{
u32 bp_cmd = DataReadU32();
// Execute
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
// Compile
emitter.ABI_CallFunctionC(&LoadBPReg, bp_cmd);
}
break;
// draw primitives
default:
if (cmd_byte & 0x80)
{
// load vertices (use computed vertex size from FifoCommandRunnable above)
// Execute
u16 numVertices = DataReadU16();
u64 pre_draw_video_data = (u64)g_pVideoData;
VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices);
// Compile
#ifdef _M_X64
emitter.MOV(64, R(RAX), Imm64(pre_draw_video_data));
emitter.MOV(64, M(&g_pVideoData), R(RAX));
#else
emitter.MOV(32, R(EAX), Imm32(pre_draw_video_data));
emitter.MOV(32, M(&g_pVideoData), R(EAX));
#endif
emitter.ABI_CallFunctionCCC(
&VertexLoaderManager::RunVertices,
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices);
}
else
{
ERROR_LOG(VIDEO, "DLCache::CompileAndRun: Illegal command %02x", cmd_byte);
break;
}
break;
}
}
emitter.ABI_EmitEpilogue(4);
g_pVideoData = old_datareader;
return true;
}
// This one's pretty expensive. We should check if we can get away with only
// hashing the entire DL the first 3 frames or something.
u32 ComputeDLHash(u32 address, u32 size)
{
u8 *ptr = Memory_GetPtr(address);
return HashFletcher(ptr, size & ~1);
}
void Init()
{
dlcode_cache = (u8 *)AllocateExecutableMemory(DL_CODE_CACHE_SIZE, false); // Don't need low memory.
static_data_buffer = (u8 *)AllocateMemoryPages(DL_STATIC_DATA_SIZE);
static_data_ptr = static_data_buffer;
emitter.SetCodePtr(dlcode_cache);
}
void Shutdown()
{
Clear();
FreeMemoryPages(dlcode_cache, DL_CODE_CACHE_SIZE);
FreeMemoryPages(static_data_buffer, DL_STATIC_DATA_SIZE);
dlcode_cache = NULL;
}
void Clear()
{
dl_map.clear();
// Reset the cache pointers.
emitter.SetCodePtr(dlcode_cache);
static_data_ptr = static_data_buffer;
}
void ProgressiveCleanup()
{
DLMap::iterator iter = dl_map.begin();
while (iter != dl_map.end()) {
CachedDisplayList &entry = iter->second;
int limit = iter->second.uncachable ? 1200 : 400;
if (entry.frame_count < frameCount - limit) {
// entry.Destroy();
#ifdef _WIN32
iter = dl_map.erase(iter);
#else
dl_map.erase(iter++); // (this is gcc standard!)
#endif
}
else
iter++;
}
}
} // namespace
// NOTE - outside the namespace on purpose.
bool HandleDisplayList(u32 address, u32 size)
{
// Disable display list caching since the benefit isn't much to write home about
// right now...
return false;
u64 dl_id = DLCache::CreateMapId(address, size);
DLCache::DLMap::iterator iter = DLCache::dl_map.find(dl_id);
stats.numDListsAlive = DLCache::dl_map.size();
if (iter != DLCache::dl_map.end())
{
DLCache::CachedDisplayList &dl = iter->second;
if (dl.uncachable)
{
// We haven't compiled it - let's return false so it gets
// interpreted.
return false;
}
// Got one! And it's been compiled too, so let's run the compiled code!
switch (dl.pass)
{
case DLCache::DLPASS_ANALYZE:
PanicAlert("DLPASS_ANALYZE - should have been done the first pass");
break;
case DLCache::DLPASS_COMPILE:
// First, check that the hash is the same as the last time.
if (dl.dl_hash != HashAdler32(Memory_GetPtr(address), size))
{
// PanicAlert("uncachable %08x", address);
dl.uncachable = true;
return false;
}
DLCache::CompileAndRunDisplayList(address, size, &dl);
dl.pass = DLCache::DLPASS_RUN;
break;
case DLCache::DLPASS_RUN:
{
// Every N draws, check hash
dl.check--;
if (dl.check <= 0)
{
if (dl.dl_hash != HashAdler32(Memory_GetPtr(address), size))
{
dl.uncachable = true;
return false;
}
dl.check = dl.next_check;
dl.next_check *= 2;
if (dl.next_check > 1024)
dl.next_check = 1024;
}
u8 *old_datareader = g_pVideoData;
((void (*)())(void*)(dl.compiled_code))();
g_pVideoData = old_datareader;
break;
}
}
return true;
}
DLCache::CachedDisplayList dl;
if (DLCache::AnalyzeAndRunDisplayList(address, size, &dl)) {
dl.dl_hash = HashAdler32(Memory_GetPtr(address), size);
dl.pass = DLCache::DLPASS_COMPILE;
dl.check = 1;
dl.next_check = 1;
DLCache::dl_map[dl_id] = dl;
return true;
} else {
dl.uncachable = true;
DLCache::dl_map[dl_id] = dl;
return true; // don't also interpret the list.
}
}

View File

@ -0,0 +1,32 @@
// Copyright (C) 2003-2009 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _DLCACHE_H
#define _DLCACHE_H
bool HandleDisplayList(u32 address, u32 size);
namespace DLCache {
void Init();
void Shutdown();
void ProgressiveCleanup();
void Clear();
} // namespace
#endif // _DLCACHE_H

View File

@ -43,6 +43,7 @@
#include "TextureMngr.h"
#include "rasterfont.h"
#include "VertexShaderGen.h"
#include "DLCache.h"
#include "PixelShaderCache.h"
#include "PixelShaderManager.h"
#include "VertexShaderCache.h"
@ -1102,6 +1103,7 @@ void Renderer::SwapBuffers()
GL_REPORT_ERRORD();
// Clean out old stuff from caches
DLCache::ProgressiveCleanup();
VertexShaderCache::ProgressiveCleanup();
PixelShaderCache::ProgressiveCleanup();
TextureMngr::ProgressiveCleanup();
@ -1186,6 +1188,7 @@ void Renderer::DrawDebugText()
p+=sprintf(p,"vshaders alive: %i\n",stats.numVertexShadersAlive);
p+=sprintf(p,"dlists called: %i\n",stats.numDListsCalled);
p+=sprintf(p,"dlists called(f): %i\n",stats.thisFrame.numDListsCalled);
p+=sprintf(p,"dlists alive: %i\n",stats.numDListsAlive);
// not used.
//p+=sprintf(p,"dlists created: %i\n",stats.numDListsCreated);
//p+=sprintf(p,"dlists alive: %i\n",stats.numDListsAlive);

View File

@ -187,10 +187,6 @@ void Flush()
GL_REPORT_ERRORD();
if(g_bSkipCurrentFrame) {
ResetBuffer();
return;
}
glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW);
@ -226,7 +222,7 @@ void Flush()
tex.texImage0[i&3].width + 1, tex.texImage0[i&3].height + 1,
tex.texImage0[i&3].format, tex.texTlut[i&3].tmem_offset<<9, tex.texTlut[i&3].tlut_format);
if (tentry != NULL)
if (tentry)
{
// texture loaded fine, set dims for pixel shader
if (tentry->isRectangle)

View File

@ -91,6 +91,7 @@ GFXDebuggerOGL *m_DebuggerFrame = NULL;
#include "PostProcessing.h"
#include "OnScreenDisplay.h"
#include "Setup.h"
#include "DLCache.h"
#include "VideoState.h"
@ -385,7 +386,7 @@ void Video_Prepare(void)
GL_REPORT_ERRORD();
VertexLoaderManager::Init();
TextureConverter::Init();
DLCache::Init();
s_swapRequested = FALSE;
s_efbAccessRequested = FALSE;
@ -400,6 +401,7 @@ void Shutdown(void)
s_efbAccessRequested = FALSE;
s_swapRequested = FALSE;
DLCache::Shutdown();
Fifo_Shutdown();
PostProcessing::Shutdown();
@ -418,7 +420,6 @@ void Shutdown(void)
OpenGL_Shutdown();
}
void Video_SendFifoData(u8* _uData, u32 len)
{
Fifo_SendFifoData(_uData, len);
@ -435,8 +436,6 @@ void Video_ExitLoop()
Fifo_ExitLoop();
}
// Screenshot and screen message
void Video_Screenshot(const char *_szFilename)