Fix shader cache bugs, improving linux speed. Remove some unnecessary printfs.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@153 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-08 19:46:04 +00:00
parent c8c1d2d905
commit 9626b3bc42
14 changed files with 49 additions and 87 deletions

View File

@ -100,15 +100,10 @@ typedef union _LARGE_INTEGER
} LARGE_INTEGER; } LARGE_INTEGER;
#endif #endif
#if defined (__MINGW32__) || defined (_WIN32) #define GC_ALIGNED16(x) __attribute((aligned(16))) x
#define GC_ALIGNED16(x) __declspec(align(16)) x #define GC_ALIGNED64(x) __attribute((aligned(64))) x
#define GC_ALIGNED16_DECL(x) x #define GC_ALIGNED16_DECL(x) __attribute((aligned(16))) x
#else #define GC_ALIGNED64_DECL(x) __attribute((aligned(64))) x
#define GC_ALIGNED16(x) x
#define GC_ALIGNED64(x) x
#define GC_ALIGNED16_DECL(x) x __attribute((aligned(16)))
#define GC_ALIGNED64_DECL(x) x
#endif
#ifndef __forceinline #ifndef __forceinline
#define __forceinline inline #define __forceinline inline

View File

@ -86,7 +86,7 @@ bool DynamicLibrary::Load(const char* filename)
if (library) { if (library) {
library_file = filename; library_file = filename;
} }
return(library != 0); return library != 0;
} }
@ -121,14 +121,14 @@ void* DynamicLibrary::Get(const char* funcname) const
//PanicAlert("Did not find function %s in library %s.", funcname, library_file.c_str()); //PanicAlert("Did not find function %s in library %s.", funcname, library_file.c_str());
//} //}
return(retval); return retval;
#else #else
retval = dlsym(library, funcname); retval = dlsym(library, funcname);
if (!retval) if (!retval)
{ {
printf("%s\n", dlerror()); printf("Symbol %s missing in %s (error: %s)\n", funcname, library_file.c_str(), dlerror());
} }
#endif #endif
} }

View File

@ -58,7 +58,7 @@ void* AllocateExecutableMemory(int size, bool low)
| (low ? MAP_32BIT : 0) | (low ? MAP_32BIT : 0)
#endif #endif
, -1, 0); // | MAP_FIXED , -1, 0); // | MAP_FIXED
printf("mappah exe %p %i\n", retval, size); // printf("Mapped executable memory at %p (size %i)\n", retval, size);
if (!retval) if (!retval)
{ {
@ -86,7 +86,7 @@ void* AllocateMemoryPages(int size)
#else #else
void* retval = mmap(0, size, PROT_READ | PROT_WRITE, void* retval = mmap(0, size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); // | MAP_FIXED MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); // | MAP_FIXED
printf("mappah %p %i\n", retval, size); // printf("Mapped memory at %p (size %i)\n", retval, size);
if (!retval) if (!retval)
{ {

View File

@ -304,7 +304,7 @@ THREAD_RETURN EmuThread(void *pArg)
if (Callback_PeekMessages) { if (Callback_PeekMessages) {
Callback_PeekMessages(); Callback_PeekMessages();
} }
Common::SleepCurrentThread(20); Common::SleepCurrentThread(200);
} }
} }
else else

View File

@ -153,6 +153,10 @@ int timeHistory[HISTORYLENGTH] = {0,0,0,0,0};
void Throttle(u64 userdata, int cyclesLate) void Throttle(u64 userdata, int cyclesLate)
{ {
#ifndef _WIN32
// had some weird problem in linux. will investigate.
return;
#endif
static Common::Timer timer; static Common::Timer timer;
for (int i=0; i<HISTORYLENGTH-1; i++) for (int i=0; i<HISTORYLENGTH-1; i++)

View File

@ -14,6 +14,7 @@
// Official SVN repository and contact information can be found at // Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/ // http://code.google.com/p/dolphin-emu/
#include "Common.h" #include "Common.h"
#include "../PowerPC.h" #include "../PowerPC.h"
@ -24,8 +25,11 @@
#include "JitCache.h" #include "JitCache.h"
#include "JitRegCache.h" #include "JitRegCache.h"
// #define INSTRUCTION_START Default(inst); return; #ifdef _WIN32
#define INSTRUCTION_START #define INSTRUCTION_START
#else
#define INSTRUCTION_START Default(inst); return;
#endif
namespace Jit64 namespace Jit64
{ {

View File

@ -129,27 +129,6 @@ namespace Jit64
SetJumpTarget(continue1); SetJumpTarget(continue1);
SetJumpTarget(continue2); SetJumpTarget(continue2);
OR(32, M(&CR), R(EAX)); OR(32, M(&CR), R(EAX));
/*
alternative
MOV(32, R(EAX), M(&CR));
AND(32, R(EAX), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), Imm32(uimm));
FixupBranch pLesser = J_CC(CC_B);
FixupBranch pGreater = J_CC(CC_A);
OR(32, R(EAX), Imm32(0x20000000 >> shift)); // _x86Reg == 0
FixupBranch continue1 = J();
SetJumpTarget(pGreater);
OR(32, R(EAX), Imm32(0x40000000 >> shift)); // _x86Reg > 0
FixupBranch continue2 = J();
SetJumpTarget(pLesser);
OR(32, R(EAX), Imm32(0x80000000 >> shift)); // _x86Reg < 0
SetJumpTarget(continue1);
SetJumpTarget(continue2);
MOV(32, M(&CR), R(EAX));
*/
} }
// signed // signed

View File

@ -35,11 +35,8 @@
#include "JitAsm.h" #include "JitAsm.h"
#include "JitRegCache.h" #include "JitRegCache.h"
#ifdef _WIN32 // #define INSTRUCTION_START
#define INSTRUCTION_START
#else
#define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START Default(inst); return;
#endif
#ifdef _M_IX86 #ifdef _M_IX86
#define DISABLE_32BIT Default(inst); return; #define DISABLE_32BIT Default(inst); return;
@ -59,7 +56,7 @@ void WriteDual32(u64 value, u32 address)
Memory::Write_U32((u32)value, address + 4); Memory::Write_U32((u32)value, address + 4);
} }
static const double m_quantizeTableD[] = static const double GC_ALIGNED16(m_quantizeTableD[]) =
{ {
(1 << 0), (1 << 1), (1 << 2), (1 << 3), (1 << 0), (1 << 1), (1 << 2), (1 << 3),
(1 << 4), (1 << 5), (1 << 6), (1 << 7), (1 << 4), (1 << 5), (1 << 6), (1 << 7),
@ -79,7 +76,7 @@ static const double m_quantizeTableD[] =
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1), 1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
}; };
static const double m_dequantizeTableD[] = static const double GC_ALIGNED16(m_dequantizeTableD[]) =
{ {
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3), 1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7), 1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
@ -149,8 +146,6 @@ void psq_st(UGeckoInstruction inst)
SetJumpTarget(argh); SetJumpTarget(argh);
CALL((void *)&WriteDual32); CALL((void *)&WriteDual32);
SetJumpTarget(arg2); SetJumpTarget(arg2);
if (update)
MOV(32, gpr.R(a), R(ABI_PARAM2));
gpr.UnlockAll(); gpr.UnlockAll();
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -164,6 +159,8 @@ void psq_st(UGeckoInstruction inst)
MOV(32, R(ABI_PARAM2), gpr.R(a)); MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset) if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update && offset)
MOV(32, gpr.R(a), R(ABI_PARAM2));
MOVAPS(XMM0, fpr.R(s)); MOVAPS(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale])); MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1)); MULPD(XMM0, R(XMM1));
@ -193,6 +190,8 @@ void psq_st(UGeckoInstruction inst)
MOV(32, R(ABI_PARAM2), gpr.R(a)); MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset) if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update)
MOV(32, gpr.R(a), R(ABI_PARAM2));
MOVAPS(XMM0, fpr.R(s)); MOVAPS(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale])); MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1)); MULPD(XMM0, R(XMM1));
@ -209,8 +208,6 @@ void psq_st(UGeckoInstruction inst)
PUSH(32, R(ABI_PARAM1)); PUSH(32, R(ABI_PARAM1));
CALL(&Memory::Write_U32); CALL(&Memory::Write_U32);
#endif #endif
if (update)
MOV(32, gpr.R(a), R(ABI_PARAM2));
gpr.UnlockAll(); gpr.UnlockAll();
fpr.UnlockAll(); fpr.UnlockAll();
} }

View File

@ -74,7 +74,6 @@ CPluginManager::ScanForPlugins(wxWindow* _wxWindow)
for (size_t i = 0; i < rFilenames.size(); i++) for (size_t i = 0; i < rFilenames.size(); i++)
{ {
std::string orig_name = rFilenames[i]; std::string orig_name = rFilenames[i];
printf("Scanning %s\n", rFilenames[i].c_str());
std::string FileName; std::string FileName;
if (!SplitPath(rFilenames[i], NULL, &FileName, NULL)) if (!SplitPath(rFilenames[i], NULL, &FileName, NULL))
@ -94,17 +93,11 @@ CPluginManager::ScanForPlugins(wxWindow* _wxWindow)
break; break;
} }
printf("Examining %s\n", FileName.c_str());
CPluginInfo PluginInfo(orig_name); CPluginInfo PluginInfo(orig_name);
printf("%s\n", orig_name.c_str());
if (PluginInfo.IsValid()) if (PluginInfo.IsValid())
{ {
m_PluginInfos.push_back(PluginInfo); m_PluginInfos.push_back(PluginInfo);
} }
printf("Valid plugin\n");
} }
} }
} }
@ -136,12 +129,8 @@ CPluginInfo::CPluginInfo(const std::string& _rFileName)
: m_FileName(_rFileName) : m_FileName(_rFileName)
, m_Valid(false) , m_Valid(false)
{ {
printf("Loading!\n");
if (Common::CPlugin::Load(_rFileName.c_str())) if (Common::CPlugin::Load(_rFileName.c_str()))
{ {
printf("Loaded!\n");
if (Common::CPlugin::GetInfo(m_PluginInfo)) if (Common::CPlugin::GetInfo(m_PluginInfo))
{ {
m_Valid = true; m_Valid = true;
@ -152,7 +141,6 @@ CPluginInfo::CPluginInfo(const std::string& _rFileName)
} }
Common::CPlugin::Release(); Common::CPlugin::Release();
printf("Unloaded!\n");
} }
else else
{ {

View File

@ -81,6 +81,7 @@ BOOL Callback_PeekMessages()
while (XPending(GLWin.dpy) > 0) { while (XPending(GLWin.dpy) > 0) {
XNextEvent(GLWin.dpy, &event); XNextEvent(GLWin.dpy, &event);
} }
return TRUE;
#endif #endif
} }

View File

@ -117,7 +117,7 @@ FRAGMENTSHADER* PixelShaderMngr::GetShader()
PSCache::iterator iter = pshaders.find(uid); PSCache::iterator iter = pshaders.find(uid);
if (iter != pshaders.end()) { if (iter != pshaders.end()) {
iter->second.frameCount=frameCount; iter->second.frameCount = frameCount;
PSCacheEntry &entry = iter->second; PSCacheEntry &entry = iter->second;
if (&entry.shader != pShaderLast) if (&entry.shader != pShaderLast)
{ {
@ -131,13 +131,14 @@ FRAGMENTSHADER* PixelShaderMngr::GetShader()
char *code = GeneratePixelShader(s_texturemask, char *code = GeneratePixelShader(s_texturemask,
Renderer::GetZBufferTarget() != 0, Renderer::GetZBufferTarget() != 0,
Renderer::GetRenderMode() != Renderer::RM_Normal); Renderer::GetRenderMode() != Renderer::RM_Normal);
// printf("Compiling pixel shader. size = %i\n", strlen(code));
if (!code || !CompilePixelShader(newentry.shader, code)) { if (!code || !CompilePixelShader(newentry.shader, code)) {
ERROR_LOG("failed to create pixel shader\n"); ERROR_LOG("failed to create pixel shader\n");
return NULL; return NULL;
} }
//Make an entry in the table //Make an entry in the table
newentry.frameCount=frameCount; newentry.frameCount = frameCount;
pShaderLast = &newentry.shader; pShaderLast = &newentry.shader;
INCSTAT(stats.numPixelShadersCreated); INCSTAT(stats.numPixelShadersCreated);
@ -150,7 +151,7 @@ void PixelShaderMngr::Cleanup()
PSCache::iterator iter = pshaders.begin(); PSCache::iterator iter = pshaders.begin();
while(iter != pshaders.end()) { while(iter != pshaders.end()) {
PSCacheEntry &entry = iter->second; PSCacheEntry &entry = iter->second;
if (entry.frameCount<frameCount-200) { if (entry.frameCount < frameCount - 200) {
entry.Destroy(); entry.Destroy();
#ifdef _WIN32 #ifdef _WIN32
iter = pshaders.erase(iter); iter = pshaders.erase(iter);
@ -170,11 +171,7 @@ bool PixelShaderMngr::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpro
char stropt[64]; char stropt[64];
sprintf(stropt, "MaxLocalParams=32,NumInstructionSlots=%d", s_nMaxPixelInstructions); sprintf(stropt, "MaxLocalParams=32,NumInstructionSlots=%d", s_nMaxPixelInstructions);
#ifdef _WIN32
const char* opts[] = {"-profileopts",stropt,"-O2","-q",NULL}; const char* opts[] = {"-profileopts",stropt,"-O2","-q",NULL};
#else
const char* opts[] = {"-profileopts",stropt,"-q",NULL};
#endif
CGprogram tempprog = cgCreateProgram(g_cgcontext, CG_SOURCE, pstrprogram, g_cgfProf, "main", opts); CGprogram tempprog = cgCreateProgram(g_cgcontext, CG_SOURCE, pstrprogram, g_cgfProf, "main", opts);
if (!cgIsProgram(tempprog) || cgGetError() != CG_NO_ERROR) { if (!cgIsProgram(tempprog) || cgGetError() != CG_NO_ERROR) {
ERROR_LOG("Failed to create ps %s:\n", cgGetLastListing(g_cgcontext)); ERROR_LOG("Failed to create ps %s:\n", cgGetLastListing(g_cgcontext));
@ -202,7 +199,6 @@ bool PixelShaderMngr::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpro
//ERROR_LOG(pcompiledprog); //ERROR_LOG(pcompiledprog);
//ERROR_LOG(pstrprogram); //ERROR_LOG(pstrprogram);
glGenProgramsARB( 1, &ps.glprogid ); glGenProgramsARB( 1, &ps.glprogid );
glBindProgramARB( GL_FRAGMENT_PROGRAM_ARB, ps.glprogid ); glBindProgramARB( GL_FRAGMENT_PROGRAM_ARB, ps.glprogid );
glProgramStringARB( GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); glProgramStringARB( GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);
@ -215,6 +211,7 @@ bool PixelShaderMngr::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpro
} }
cgDestroyProgram(tempprog); cgDestroyProgram(tempprog);
printf("Compiled pixel shader %i\n", ps.glprogid);
#ifdef _DEBUG #ifdef _DEBUG
ps.strprog = pstrprogram; ps.strprog = pstrprogram;

View File

@ -28,17 +28,21 @@ struct FRAGMENTSHADER
{ {
FRAGMENTSHADER() : glprogid(0) { } FRAGMENTSHADER() : glprogid(0) { }
GLuint glprogid; // opengl program id GLuint glprogid; // opengl program id
#ifdef _DEBUG #ifdef _DEBUG
std::string strprog; std::string strprog;
#endif #endif
}; };
class PixelShaderMngr class PixelShaderMngr
{ {
class PIXELSHADERUID class PIXELSHADERUID
{ {
public: public:
PIXELSHADERUID() { values = new u32[3+32+6+11]; tevstages = indstages = 0; } PIXELSHADERUID() {
values = new u32[4+32+6+11];
memset(values, 0, (4+32+6+11) * 4);
tevstages = indstages = 0;
}
~PIXELSHADERUID() { delete[] values; } ~PIXELSHADERUID() { delete[] values; }
PIXELSHADERUID(const PIXELSHADERUID& r) PIXELSHADERUID(const PIXELSHADERUID& r)
{ {
@ -57,7 +61,7 @@ class PixelShaderMngr
else if( values[0] > _Right.values[0] ) else if( values[0] > _Right.values[0] )
return false; return false;
int N = tevstages + 3; // numTevStages*3/2+1 int N = tevstages + indstages + 3; // numTevStages*3/2+1
int i = 1; int i = 1;
for(; i < N; ++i) { for(; i < N; ++i) {
if( values[i] < _Right.values[i] ) if( values[i] < _Right.values[i] )
@ -66,14 +70,6 @@ class PixelShaderMngr
return false; return false;
} }
N += indstages;
for(; i < N; ++i) {
if( values[i] < _Right.values[i] )
return true;
else if( values[i] > _Right.values[i] )
return false;
}
return false; return false;
} }
@ -82,19 +78,13 @@ class PixelShaderMngr
if( values[0] != _Right.values[0] ) if( values[0] != _Right.values[0] )
return false; return false;
int N = tevstages + 3; // numTevStages*3/2+1 int N = tevstages + indstages + 3; // numTevStages*3/2+1
int i = 1; int i = 1;
for(; i < N; ++i) { for(; i < N; ++i) {
if( values[i] != _Right.values[i] ) if( values[i] != _Right.values[i] )
return false; return false;
} }
N += indstages;
for(; i < N; ++i) {
if( values[i] != _Right.values[i] )
return false;
}
return true; return true;
} }
@ -109,7 +99,9 @@ class PixelShaderMngr
PSCacheEntry() : frameCount(0) {} PSCacheEntry() : frameCount(0) {}
~PSCacheEntry() {} ~PSCacheEntry() {}
void Destroy() { void Destroy() {
printf("Destroying ps %i\n", shader.glprogid);
glDeleteProgramsARB(1, &shader.glprogid); glDeleteProgramsARB(1, &shader.glprogid);
shader.glprogid = 0;
} }
}; };

View File

@ -183,6 +183,7 @@ bool VertexShaderMngr::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpro
} }
cgDestroyProgram(tempprog); cgDestroyProgram(tempprog);
printf("Compiled vertex shader %i\n", vs.glprogid);
#ifdef _DEBUG #ifdef _DEBUG
vs.strprog = pstrprogram; vs.strprog = pstrprogram;

View File

@ -41,14 +41,18 @@ class VertexShaderMngr
int frameCount; int frameCount;
VSCacheEntry() : frameCount(0) {} VSCacheEntry() : frameCount(0) {}
void Destroy() { void Destroy() {
SAFE_RELEASE_PROG(shader.glprogid); printf("Destroying vs %i\n", shader.glprogid);
glDeleteProgramsARB(1, &shader.glprogid);
shader.glprogid = 0;
} }
}; };
class VERTEXSHADERUID class VERTEXSHADERUID
{ {
public: public:
VERTEXSHADERUID() {} VERTEXSHADERUID() {
memset(values, 0, sizeof(values));
}
VERTEXSHADERUID(const VERTEXSHADERUID& r) { VERTEXSHADERUID(const VERTEXSHADERUID& r) {
for(size_t i = 0; i < sizeof(values) / sizeof(u32); ++i) for(size_t i = 0; i < sizeof(values) / sizeof(u32); ++i)
values[i] = r.values[i]; values[i] = r.values[i];