rewrite texture cache, change commandline from --single-core to --num-cores=N, add multithreading to rasterizer, add toggles to 3d config to disable fog+edgemarking for little speedups in games that use them.

This commit is contained in:
zeromus 2009-10-28 09:39:52 +00:00
parent dd117dc47f
commit 8934925019
21 changed files with 1749 additions and 1192 deletions

View File

@ -55,9 +55,6 @@ GPU::MosaicLookup GPU::mosaicLookup;
//#define DEBUG_TRI
CACHE_ALIGN u8 GPU_screen[4*256*192];
u8 *GPU_tempScanline;
CACHE_ALIGN u16 GPU_tempScanlineBuffer[256];
CACHE_ALIGN u8 sprWin[256];
@ -2237,7 +2234,7 @@ template<bool SKIP> static void GPU_RenderLine_DispCapture(u16 l)
//INFO("Capture screen (BG + OBJ + 3D)\n");
u8 *src;
src = (u8*)(GPU_tempScanline);
src = (u8*)(gpu->tempScanline);
CAPCOPY(src,cap_dst);
}
break;
@ -2279,7 +2276,7 @@ template<bool SKIP> static void GPU_RenderLine_DispCapture(u16 l)
if (gpu->dispCapCnt.srcA == 0)
{
// Capture screen (BG + OBJ + 3D)
srcA = (u16*)(GPU_tempScanline);
srcA = (u16*)(gpu->tempScanline);
}
else
{
@ -2579,10 +2576,10 @@ void GPU_RenderLine(NDS_Screen * screen, u16 l, bool skip)
//generate the 2d engine output
if(gpu->dispMode == 1) {
//optimization: render straight to the output buffer when thats what we are going to end up displaying anyway
GPU_tempScanline = screen->gpu->currDst = (u8 *)(GPU_screen) + (screen->offset + l) * 512;
gpu->tempScanline = screen->gpu->currDst = (u8 *)(GPU_screen) + (screen->offset + l) * 512;
} else {
//otherwise, we need to go to a temp buffer
GPU_tempScanline = screen->gpu->currDst = (u8 *)GPU_tempScanlineBuffer;
gpu->tempScanline = screen->gpu->currDst = (u8 *)gpu->tempScanlineBuffer;
}
GPU_RenderLine_layer(screen, l);

View File

@ -736,6 +736,9 @@ struct GPU
u16 *currentFadeInColors, *currentFadeOutColors;
bool blend2[8];
CACHE_ALIGN u16 tempScanlineBuffer[256];
u8 *tempScanline;
u8 MasterBrightMode;
u32 MasterBrightFactor;

View File

@ -43,6 +43,7 @@ libdesmume_a_SOURCES = \
utils/md5.cpp utils/md5.h utils/valuearray.h utils/xstring.cpp utils/xstring.h \
utils/decrypt/crc.cpp utils/decrypt/crc.h utils/decrypt/decrypt.cpp \
utils/decrypt/decrypt.h utils/decrypt/header.cpp utils/decrypt/header.h \
utils/task.cpp utils/task.h \
addons.cpp addons.h \
addons/compactFlash.cpp addons/gbagame.cpp addons/none.cpp addons/rumblepak.cpp addons/guitarGrip.cpp addons/expMemory.cpp fs.h \
cheatSystem.cpp cheatSystem.h \

View File

@ -1881,6 +1881,14 @@ void Sequencer::init()
#endif
}
//this isnt helping much right now. work on it later
//#include "utils/task.h"
//Task taskSubGpu(true);
//void* renderSubScreen(void*)
//{
// GPU_RenderLine(&SubScreen, nds.VCount, SkipCur2DFrame);
// return NULL;
//}
static void execHardware_hblank()
{
@ -1907,8 +1915,10 @@ static void execHardware_hblank()
//in practice we need to be more forgiving, in case things have overrun the scanline start.
//this should be safe since games cannot do anything timing dependent until this next
//scanline begins, anyway (as this scanline was in the middle of drawing)
//taskSubGpu.execute(renderSubScreen,NULL);
GPU_RenderLine(&MainScreen, nds.VCount, SkipCur2DFrame);
GPU_RenderLine(&SubScreen, nds.VCount, SkipCur2DFrame);
//taskSubGpu.finish();
//trigger hblank dmas
//but notice, we do that just after we finished drawing the line
@ -1963,12 +1973,12 @@ static void execHardware_hstart_vblankStart()
static void execHardware_hstart_vcount()
{
u16 vmatch = T1ReadWord(MMU.ARM9_REG, 4);
if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8))))
vmatch = ((vmatch>>8)|((vmatch<<1)&(1<<8)));
if(nds.VCount==vmatch)
{
//arm9 vmatch
T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) | 4);
if(T1ReadWord(MMU.ARM9_REG, 4) & 32) {
//printf("VMATCH FIRING! vc=%03d\n",nds.VCount);
NDS_makeARM9Int(2);
}
}
@ -1976,7 +1986,8 @@ static void execHardware_hstart_vcount()
T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) & 0xFFFB);
vmatch = T1ReadWord(MMU.ARM7_REG, 4);
if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8))))
vmatch = ((vmatch>>8)|((vmatch<<1)&(1<<8)));
if(nds.VCount==vmatch)
{
//arm7 vmatch
T1WriteWord(MMU.ARM7_REG, 4, T1ReadWord(MMU.ARM7_REG, 4) | 4);

View File

@ -421,17 +421,19 @@ int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char
extern struct TCommonSettings {
TCommonSettings()
: HighResolutionInterpolateColor(true)
, UseExtBIOS(false)
: UseExtBIOS(false)
, SWIFromBIOS(false)
, UseExtFirmware(false)
, BootFromFirmware(false)
, DebugConsole(false)
, single_core(true)
, num_cores(1)
, spuInterpolationMode(SPUInterpolation_Linear)
//, gfx3d_flushMode(0)
, manualBackupType(0)
, micMode(InternalNoise)
, GFX3D_HighResolutionInterpolateColor(true)
, GFX3D_EdgeMark(true)
, GFX3D_Fog(true)
{
strcpy(ARM9BIOS, "biosnds9.bin");
strcpy(ARM7BIOS, "biosnds7.bin");
@ -443,7 +445,9 @@ extern struct TCommonSettings {
for(int i=0;i<16;i++)
spu_muteChannels[i] = false;
}
bool HighResolutionInterpolateColor;
bool GFX3D_HighResolutionInterpolateColor;
bool GFX3D_EdgeMark;
bool GFX3D_Fog;
bool UseExtBIOS;
char ARM9BIOS[256];
@ -456,7 +460,8 @@ extern struct TCommonSettings {
bool DebugConsole;
bool single_core;
int num_cores;
bool single_core() { return num_cores==1; }
struct _Wifi {
int mode;

View File

@ -24,6 +24,8 @@
//so, it doesnt composite to 2d correctly.
//(re: new super mario brothers renders the stormclouds at the beginning)
#include <queue>
#include "OGLRender.h"
#include "debug.h"
@ -208,9 +210,8 @@ static void _xglDisable(GLenum cap) {
CTASSERT((cap-0x0B00)<0x100); \
_xglDisable(cap); }
static std::queue<GLuint> freeTextureIds;
GLenum oglTempTextureID[MAX_TEXTURE];
GLenum oglToonTableTextureID;
#define NOSHADERS(s) { hasShaders = false; INFO("Shaders aren't supported on your system, using fixed pipeline\n(%s)\n", s); return; }
@ -252,17 +253,16 @@ GLenum oglToonTableTextureID;
bool hasShaders = false;
/* Vertex shader */
GLuint vertexShaderID;
/* Fragment shader */
GLuint fragmentShaderID;
/* Shader program */
GLuint shaderProgram;
static GLuint hasTexLoc;
static GLuint texBlendLoc;
static bool hasTexture = false;
static ADPCMCacheItem* currTexture = NULL;
/* Shaders init */
static void createShaders()
@ -337,45 +337,54 @@ static void OGLReset()
}
TexCache_Reset();
for (int i = 0; i < MAX_TEXTURE; i++)
texcache[i].id=oglTempTextureID[i];
currTexture = NULL;
// memset(GPU_screenStencil,0,sizeof(GPU_screenStencil));
memset(GPU_screen3D,0,sizeof(GPU_screen3D));
}
//static class OGLTexCacheUser : public ITexCacheUser
//{
//public:
// virtual void BindTexture(u32 tx)
// {
// glBindTexture(GL_TEXTURE_2D,(GLuint)texcache[tx].id);
// glMatrixMode (GL_TEXTURE);
// glLoadIdentity ();
// glScaled (texcache[tx].invSizeX, texcache[tx].invSizeY, 1.0f);
//
// glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
// glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
//
// glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (BIT16(texcache[tx].frm) ? (BIT18(texcache[tx].frm)?GL_MIRRORED_REPEAT:GL_REPEAT) : GL_CLAMP));
// glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (BIT17(texcache[tx].frm) ? (BIT19(texcache[tx].frm)?GL_MIRRORED_REPEAT:GL_REPEAT) : GL_CLAMP));
// }
//
// virtual void BindTextureData(u32 tx, u8* data)
// {
// BindTexture(tx);
//
// #if 0
// for (int i=0; i < texcache[tx].sizeX * texcache[tx].sizeY*4; i++)
// data[i] = 0xFF;
// #endif
// glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA,
// texcache[tx].sizeX, texcache[tx].sizeY, 0,
// GL_RGBA, GL_UNSIGNED_BYTE, data);
// }
//} textures;
//
//static TexCacheUnit texCacheUnit;
static void BindTexture(u32 tx)
static void expandFreeTextures()
{
glBindTexture(GL_TEXTURE_2D,(GLuint)texcache[tx].id);
glMatrixMode (GL_TEXTURE);
glLoadIdentity ();
glScaled (texcache[tx].invSizeX, texcache[tx].invSizeY, 1.0f);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (BIT16(texcache[tx].frm) ? (BIT18(texcache[tx].frm)?GL_MIRRORED_REPEAT:GL_REPEAT) : GL_CLAMP));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (BIT17(texcache[tx].frm) ? (BIT19(texcache[tx].frm)?GL_MIRRORED_REPEAT:GL_REPEAT) : GL_CLAMP));
const int kInitTextures = 128;
GLuint oglTempTextureID[kInitTextures];
glGenTextures(kInitTextures, &oglTempTextureID[0]);
for(int i=0;i<kInitTextures;i++)
freeTextureIds.push(oglTempTextureID[i]);
}
static void BindTextureData(u32 tx, u8* data)
{
BindTexture(tx);
#if 0
for (int i=0; i < texcache[tx].sizeX * texcache[tx].sizeY*4; i++)
data[i] = 0xFF;
#endif
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA,
texcache[tx].sizeX, texcache[tx].sizeY, 0,
GL_RGBA, GL_UNSIGNED_BYTE, data);
}
static char OGLInit(void)
{
GLuint loc = 0;
@ -388,9 +397,7 @@ static char OGLInit(void)
if(!BEGINGL())
return 0;
TexCache_BindTexture = BindTexture;
TexCache_BindTextureData = BindTextureData;
glGenTextures (MAX_TEXTURE, &oglTempTextureID[0]);
expandFreeTextures();
glPixelStorei(GL_PACK_ALIGNMENT,8);
@ -498,12 +505,28 @@ static void OGLClose()
hasShaders = false;
}
glDeleteTextures(MAX_TEXTURE, &oglTempTextureID[0]);
//kill the tex cache to free all the texture ids
TexCache_Reset();
while(!freeTextureIds.empty())
{
GLuint temp = freeTextureIds.front();
freeTextureIds.pop();
glDeleteTextures(1,&temp);
}
//glDeleteTextures(MAX_TEXTURE, &oglTempTextureID[0]);
glDeleteTextures(1, &oglToonTableTextureID);
ENDGL();
}
static void texDeleteCallback(ADPCMCacheItem* item)
{
freeTextureIds.push((GLuint)item->texid);
if(currTexture == item)
currTexture = NULL;
}
static void setTexture(unsigned int format, unsigned int texpal)
{
textureFormat = format;
@ -529,7 +552,43 @@ static void setTexture(unsigned int format, unsigned int texpal)
}
TexCache_SetTexture<TexFormat_32bpp>(format, texpal);
// texCacheUnit.TexCache_SetTexture<TexFormat_32bpp>(format, texpal);
ADPCMCacheItem* newTexture = TexCache_SetTexture(TexFormat_32bpp,format,texpal);
if(newTexture != currTexture)
{
currTexture = newTexture;
//has the ogl renderer initialized the texture?
if(!currTexture->deleteCallback)
{
currTexture->deleteCallback = texDeleteCallback;
if(freeTextureIds.empty()) expandFreeTextures();
currTexture->texid = (void*)freeTextureIds.front();
freeTextureIds.pop();
glBindTexture(GL_TEXTURE_2D,(GLuint)currTexture->texid);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (BIT16(currTexture->texformat) ? (BIT18(currTexture->texformat)?GL_MIRRORED_REPEAT:GL_REPEAT) : GL_CLAMP));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (BIT17(currTexture->texformat) ? (BIT19(currTexture->texformat)?GL_MIRRORED_REPEAT:GL_REPEAT) : GL_CLAMP));
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA,
currTexture->sizeX, currTexture->sizeY, 0,
GL_RGBA, GL_UNSIGNED_BYTE, currTexture->decoded);
}
else
{
//otherwise, just bind it
glBindTexture(GL_TEXTURE_2D,(GLuint)currTexture->texid);
}
//in either case, we need to setup the tex mtx
glMatrixMode(GL_TEXTURE);
glLoadIdentity();
glScalef(currTexture->invSizeX, currTexture->invSizeY, 1.0f);
}
}
@ -902,6 +961,9 @@ static void OGLRender()
}
}
//needs to happen before endgl because it could free some textureids for expired cache items
TexCache_EvictFrame();
ENDGL();
GL_ReadFramebuffer();

View File

@ -148,7 +148,7 @@ void Agg_init()
aggDraw.target = targets[0];
//if we're single core, we don't want to waste time compositing
if(CommonSettings.single_core)
if(CommonSettings.single_core())
aggDraw.hud = &agg_targetScreen;
//and the more clever compositing isnt supported in non-windows

View File

@ -40,8 +40,7 @@ CommandLine::CommandLine()
, _record_movie_file(0)
, _cflash_image(0)
, _cflash_path(0)
, _single_core(0)
, _multi_core(0)
, _num_cores(-1)
, _bios_arm9(NULL)
, _bios_arm7(NULL)
, _bios_swi(0)
@ -74,8 +73,7 @@ void CommandLine::loadCommonOptions()
{ "bios-arm7", 0, 0, G_OPTION_ARG_FILENAME, &_bios_arm7, "Uses the arm7 bios provided at the specified path", "BIOS_ARM7_PATH"},
{ "bios-swi", 0, 0, G_OPTION_ARG_INT, &_bios_swi, "Uses SWI from the provided bios files", "BIOS_SWI"},
#ifdef _MSC_VER
{ "single-core", 0, 0, G_OPTION_ARG_NONE, &_single_core, "Limit execution to use approximately only one core", "NUM_CORES"},
{ "multi-core", 0, 0, G_OPTION_ARG_NONE, &_multi_core, "Act as if multiple cores are present, even on a single-core machine", "MULTI_CORE"},
{ "num-cores", 0, 0, G_OPTION_ARG_NONE, &_num_cores, "Override numcores detection and use this many", "NUM_CORES"},
{ "scanline-filter-a", 0, 0, G_OPTION_ARG_INT, &scanline_filter_a, "Intensity of fadeout for scanlines filter (edge) (default 2)", "SCANLINE_FILTER_A"},
{ "scanline-filter-b", 0, 0, G_OPTION_ARG_INT, &scanline_filter_b, "Intensity of fadeout for scanlines filter (corner) (default 4)", "SCANLINE_FILTER_B"},
#endif
@ -103,8 +101,7 @@ bool CommandLine::parse(int argc,char **argv)
if(_cflash_image) cflash_image = _cflash_image;
if(_cflash_path) cflash_path = _cflash_path;
if(_single_core) CommonSettings.single_core = true;
if(_multi_core) CommonSettings.single_core = false;
if(_num_cores != -1) CommonSettings.num_cores = _num_cores;
//TODO MAX PRIORITY! change ARM9BIOS etc to be a std::string
if(_bios_arm9) { CommonSettings.UseExtBIOS = true; strcpy(CommonSettings.ARM9BIOS,_bios_arm9); }

View File

@ -75,8 +75,7 @@ private:
char* _cflash_path;
char* _bios_arm9, *_bios_arm7;
int _bios_swi;
int _single_core;
int _multi_core;
int _num_cores;
};
#endif

View File

@ -2341,7 +2341,7 @@ static FORCEINLINE VERT clipPoint(VERT* inside, VERT* outside, int coord, int wh
INTERP(coord[0]); INTERP(coord[1]); INTERP(coord[2]); INTERP(coord[3]);
INTERP(texcoord[0]); INTERP(texcoord[1]);
if(CommonSettings.HighResolutionInterpolateColor)
if(CommonSettings.GFX3D_HighResolutionInterpolateColor)
{
INTERP(fcolor[0]); INTERP(fcolor[1]); INTERP(fcolor[2]);
}

View File

@ -1,8 +1,4 @@
/* Copyright (C) 2006 yopyop
yopyop156@ifrance.com
yopyop156.ifrance.com
Copyright 2009 DeSmuME team
/* Copyright 2009 DeSmuME team
This file is part of DeSmuME
@ -46,6 +42,7 @@
#include "gfx3d.h"
#include "texcache.h"
#include "NDSSystem.h"
#include "utils/task.h"
//#undef FORCEINLINE
//#define FORCEINLINE
@ -63,8 +60,6 @@ template<typename T> T _max(T a, T b, T c, T d) { return max(_max(a,b,d),c); }
static const int kUnsetTranslucentPolyID = 255;
static int polynum;
static u8 modulate_table[64][64];
static u8 decal_table[32][64][64];
static u8 index_lookup_table[65];
@ -72,6 +67,9 @@ static u8 index_start_table[8];
static GFX3D_Clipper clipper;
static GFX3D_Clipper::TClippedPoly *clippedPolys = NULL;
static ADPCMCacheItem* polyTexKeys[POLYLIST_SIZE];
static bool polyVisible[POLYLIST_SIZE];
static bool polyBackfacing[POLYLIST_SIZE];
static int clippedPolyCounter;
@ -118,65 +116,242 @@ static FORCEINLINE int fastFloor(float f)
//----texture cache---
//TODO - the texcache could ask for a buffer to generate into
//that would avoid us ever having to buffercopy..
struct TextureBuffers
union FragmentColor {
u32 color;
struct {
u8 r,g,b,a;
};
};
struct Fragment
{
static const int numTextures = MAX_TEXTURE+1;
u8* buffers[numTextures];
u32 depth;
void clear() { memset(buffers,0,sizeof(buffers)); }
struct {
u8 opaque, translucent;
} polyid;
TextureBuffers()
{
clear();
}
u8 stencil;
void free()
{
for(int i=0;i<numTextures;i++)
delete[] buffers[i];
clear();
}
struct {
u8 isTranslucentPoly:1;
u8 fogged:1;
};
};
~TextureBuffers() {
free();
}
//INLINE static void SubmitVertex(int vert_index, VERT& rawvert)
//{
// verts[vert_index] = &rawvert;
//}
void setCurrent(int num)
{
currentData = buffers[num];
}
static Fragment screen[256*192];
static FragmentColor screenColor[256*192];
static FragmentColor toonTable[32];
static u8 fogTable[32768];
void create(int num, u8* data)
{
delete[] buffers[num];
int size = texcache[num].sizeX * texcache[num].sizeY * 4;
buffers[num] = new u8[size];
setCurrent(num);
memcpy(currentData,data,size);
}
u8* currentData;
} textures;
//called from the texture cache to change the active texture
static void BindTexture(u32 tx)
{
textures.setCurrent(tx);
FORCEINLINE int iround(float f) {
return (int)f; //lol
}
//caled from the texture cache to change to a new texture
static void BindTextureData(u32 tx, u8* data)
typedef int fixed28_4;
static bool failure;
// handle floor divides and mods correctly
FORCEINLINE void FloorDivMod(long Numerator, long Denominator, long &Floor, long &Mod)
{
textures.create(tx,data);
//These must be caused by invalid or degenerate shapes.. not sure yet.
//check it out in the mario face intro of SM64
//so, we have to take out the assert.
//I do know that we handle SOME invalid shapes without crashing,
//since I see them acting poppy in a way that doesnt happen in the HW.. so alas it is also incorrect.
//This particular incorrectness is not likely ever to get fixed!
//assert(Denominator > 0);
//but we have to bail out since our handling for these cases currently steps scanlines
//the wrong way and goes totally nuts (freezes)
if(Denominator<=0)
failure = true;
if(Numerator >= 0) {
// positive case, C is okay
Floor = Numerator / Denominator;
Mod = Numerator % Denominator;
} else {
// Numerator is negative, do the right thing
Floor = -((-Numerator) / Denominator);
Mod = (-Numerator) % Denominator;
if(Mod) {
// there is a remainder
Floor--; Mod = Denominator - Mod;
}
}
}
//---------------
FORCEINLINE fixed28_4 FloatToFixed28_4( float Value ) {
return (fixed28_4)(Value * 16);
}
FORCEINLINE float Fixed28_4ToFloat( fixed28_4 Value ) {
return Value / 16.0f;
}
//inline fixed16_16 FloatToFixed16_16( float Value ) {
// return (fixed16_6)(Value * 65536);
//}
//inline float Fixed16_16ToFloat( fixed16_16 Value ) {
// return Value / 65536.0;
//}
FORCEINLINE fixed28_4 Fixed28_4Mul( fixed28_4 A, fixed28_4 B ) {
// could make this asm to prevent overflow
return (A * B) / 16; // 28.4 * 28.4 = 24.8 / 16 = 28.4
}
FORCEINLINE int Ceil28_4( fixed28_4 Value ) {
int ReturnValue;
int Numerator = Value - 1 + 16;
if(Numerator >= 0) {
ReturnValue = Numerator/16;
} else {
// deal with negative numerators correctly
ReturnValue = -((-Numerator)/16);
ReturnValue -= ((-Numerator) % 16) ? 1 : 0;
}
return ReturnValue;
}
struct PolyAttr
struct edge_fx_fl {
edge_fx_fl() {}
edge_fx_fl(int Top, int Bottom, VERT** verts);
FORCEINLINE int Step();
VERT** verts;
long X, XStep, Numerator, Denominator; // DDA info for x
long ErrorTerm;
int Y, Height; // current y and vertical count
struct Interpolant {
float curr, step, stepExtra;
FORCEINLINE void doStep() { curr += step; }
FORCEINLINE void doStepExtra() { curr += stepExtra; }
FORCEINLINE void initialize(float top, float bottom, float dx, float dy, long XStep, float XPrestep, float YPrestep) {
dx = 0;
dy *= (bottom-top);
curr = top + YPrestep * dy + XPrestep * dx;
step = XStep * dx + dy;
stepExtra = dx;
}
};
static const int NUM_INTERPOLANTS = 7;
union {
struct {
Interpolant invw,z,u,v,color[3];
};
Interpolant interpolants[NUM_INTERPOLANTS];
};
void FORCEINLINE doStepInterpolants() { for(int i=0;i<NUM_INTERPOLANTS;i++) interpolants[i].doStep(); }
void FORCEINLINE doStepExtraInterpolants() { for(int i=0;i<NUM_INTERPOLANTS;i++) interpolants[i].doStepExtra(); }
};
FORCEINLINE edge_fx_fl::edge_fx_fl(int Top, int Bottom, VERT** verts) {
this->verts = verts;
Y = Ceil28_4((fixed28_4)verts[Top]->y);
int YEnd = Ceil28_4((fixed28_4)verts[Bottom]->y);
Height = YEnd - Y;
if(Height)
{
long dN = long(verts[Bottom]->y - verts[Top]->y);
long dM = long(verts[Bottom]->x - verts[Top]->x);
long InitialNumerator = (long)(dM*16*Y - dM*verts[Top]->y + dN*verts[Top]->x - 1 + dN*16);
FloorDivMod(InitialNumerator,dN*16,X,ErrorTerm);
FloorDivMod(dM*16,dN*16,XStep,Numerator);
Denominator = dN*16;
float YPrestep = Fixed28_4ToFloat((fixed28_4)(Y*16 - verts[Top]->y));
float XPrestep = Fixed28_4ToFloat((fixed28_4)(X*16 - verts[Top]->x));
float dy = 1/Fixed28_4ToFloat(dN);
float dx = 1/Fixed28_4ToFloat(dM);
invw.initialize(1/verts[Top]->w,1/verts[Bottom]->w,dx,dy,XStep,XPrestep,YPrestep);
u.initialize(verts[Top]->u,verts[Bottom]->u,dx,dy,XStep,XPrestep,YPrestep);
v.initialize(verts[Top]->v,verts[Bottom]->v,dx,dy,XStep,XPrestep,YPrestep);
z.initialize(verts[Top]->z,verts[Bottom]->z,dx,dy,XStep,XPrestep,YPrestep);
for(int i=0;i<3;i++)
color[i].initialize(verts[Top]->fcolor[i],verts[Bottom]->fcolor[i],dx,dy,XStep,XPrestep,YPrestep);
}
}
FORCEINLINE int edge_fx_fl::Step() {
X += XStep; Y++; Height--;
doStepInterpolants();
ErrorTerm += Numerator;
if(ErrorTerm >= Denominator) {
X++;
ErrorTerm -= Denominator;
doStepExtraInterpolants();
}
return Height;
}
static FORCEINLINE void alphaBlend(FragmentColor & dst, const FragmentColor & src)
{
if(gfx3d.enableAlphaBlending)
{
if(src.a == 0 || dst.a == 0)
{
dst = src;
}
else
{
u8 alpha = src.a+1;
u8 invAlpha = 32 - alpha;
dst.r = (alpha*src.r + invAlpha*dst.r)>>5;
dst.g = (alpha*src.g + invAlpha*dst.g)>>5;
dst.b = (alpha*src.b + invAlpha*dst.b)>>5;
}
dst.a = max(src.a,dst.a);
}
else
{
if(src.a == 0)
{
//do nothing; the fragment is totally transparent
}
else
{
dst = src;
}
}
}
class RasterizerUnit
{
public:
int SLI_MASK, SLI_VALUE;
RasterizerUnit()
: sampler(*this)
, shader(sampler)
{
}
ADPCMCacheItem* lastTexKey;
VERT* verts[MAX_CLIPPED_VERTS];
struct PolyAttr
{
u32 val;
bool decalMode;
@ -217,50 +392,17 @@ struct PolyAttr
fogged = BIT15(val);
}
} polyAttr;
union FragmentColor {
u32 color;
struct {
u8 r,g,b,a;
};
};
struct Fragment
{
u32 depth;
struct {
u8 opaque, translucent;
} polyid;
u8 stencil;
struct {
u8 isTranslucentPoly:1;
u8 fogged:1;
};
};
static VERT* verts[MAX_CLIPPED_VERTS];
//INLINE static void SubmitVertex(int vert_index, VERT& rawvert)
//{
// verts[vert_index] = &rawvert;
//}
static Fragment screen[256*192];
static FragmentColor screenColor[256*192];
static FragmentColor toonTable[32];
static u8 fogTable[32768];
FORCEINLINE int iround(float f) {
return (int)f; //lol
}
} polyAttr;
static struct Sampler
{
struct Sampler
{
Sampler(RasterizerUnit& _unit)
: unit(_unit)
{}
RasterizerUnit& unit;
int width, height;
int wmask, hmask;
int wrap;
@ -332,14 +474,18 @@ static struct Sampler
dowrap(iu,iv);
FragmentColor color;
color.color = ((u32*)textures.currentData)[(iv<<wshift)+iu];
color.color = ((u32*)unit.lastTexKey->decoded)[(iv<<wshift)+iu];
return color;
}
} sampler;
} sampler;
struct Shader
{
struct Shader
{
Shader(Sampler& _sampler)
:sampler(_sampler)
{}
Sampler& sampler;
u8 mode;
void setup(u32 polyattr)
{
@ -438,41 +584,10 @@ struct Shader
}
}
} shader;
} shader;
static FORCEINLINE void alphaBlend(FragmentColor & dst, const FragmentColor & src)
{
if(gfx3d.enableAlphaBlending)
FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, float invv, float w, float z)
{
if(src.a == 0 || dst.a == 0)
{
dst = src;
}
else
{
u8 alpha = src.a+1;
u8 invAlpha = 32 - alpha;
dst.r = (alpha*src.r + invAlpha*dst.r)>>5;
dst.g = (alpha*src.g + invAlpha*dst.g)>>5;
dst.b = (alpha*src.b + invAlpha*dst.b)>>5;
}
dst.a = max(src.a,dst.a);
}
else
{
if(src.a == 0)
{
//do nothing; the fragment is totally transparent
}
else
{
dst = src;
}
}
}
static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, float invv, float w, float z) {
Fragment &destFragment = screen[adr];
FragmentColor &destFragmentColor = screenColor[adr];
@ -601,153 +716,11 @@ static FORCEINLINE void pixel(int adr,float r, float g, float b, float invu, flo
depth_fail:
rejected_fragment:
;
}
typedef int fixed28_4;
static bool failure;
// handle floor divides and mods correctly
FORCEINLINE void FloorDivMod(long Numerator, long Denominator, long &Floor, long &Mod)
{
//These must be caused by invalid or degenerate shapes.. not sure yet.
//check it out in the mario face intro of SM64
//so, we have to take out the assert.
//I do know that we handle SOME invalid shapes without crashing,
//since I see them acting poppy in a way that doesnt happen in the HW.. so alas it is also incorrect.
//This particular incorrectness is not likely ever to get fixed!
//assert(Denominator > 0);
//but we have to bail out since our handling for these cases currently steps scanlines
//the wrong way and goes totally nuts (freezes)
if(Denominator<=0)
failure = true;
if(Numerator >= 0) {
// positive case, C is okay
Floor = Numerator / Denominator;
Mod = Numerator % Denominator;
} else {
// Numerator is negative, do the right thing
Floor = -((-Numerator) / Denominator);
Mod = (-Numerator) % Denominator;
if(Mod) {
// there is a remainder
Floor--; Mod = Denominator - Mod;
}
}
}
FORCEINLINE fixed28_4 FloatToFixed28_4( float Value ) {
return (fixed28_4)(Value * 16);
}
FORCEINLINE float Fixed28_4ToFloat( fixed28_4 Value ) {
return Value / 16.0f;
}
//inline fixed16_16 FloatToFixed16_16( float Value ) {
// return (fixed16_6)(Value * 65536);
//}
//inline float Fixed16_16ToFloat( fixed16_16 Value ) {
// return Value / 65536.0;
//}
FORCEINLINE fixed28_4 Fixed28_4Mul( fixed28_4 A, fixed28_4 B ) {
// could make this asm to prevent overflow
return (A * B) / 16; // 28.4 * 28.4 = 24.8 / 16 = 28.4
}
FORCEINLINE int Ceil28_4( fixed28_4 Value ) {
int ReturnValue;
int Numerator = Value - 1 + 16;
if(Numerator >= 0) {
ReturnValue = Numerator/16;
} else {
// deal with negative numerators correctly
ReturnValue = -((-Numerator)/16);
ReturnValue -= ((-Numerator) % 16) ? 1 : 0;
}
return ReturnValue;
}
struct edge_fx_fl {
edge_fx_fl() {}
edge_fx_fl(int Top, int Bottom);
FORCEINLINE int Step();
long X, XStep, Numerator, Denominator; // DDA info for x
long ErrorTerm;
int Y, Height; // current y and vertical count
struct Interpolant {
float curr, step, stepExtra;
FORCEINLINE void doStep() { curr += step; }
FORCEINLINE void doStepExtra() { curr += stepExtra; }
FORCEINLINE void initialize(float top, float bottom, float dx, float dy, long XStep, float XPrestep, float YPrestep) {
dx = 0;
dy *= (bottom-top);
curr = top + YPrestep * dy + XPrestep * dx;
step = XStep * dx + dy;
stepExtra = dx;
}
};
static const int NUM_INTERPOLANTS = 7;
union {
struct {
Interpolant invw,z,u,v,color[3];
};
Interpolant interpolants[NUM_INTERPOLANTS];
};
void FORCEINLINE doStepInterpolants() { for(int i=0;i<NUM_INTERPOLANTS;i++) interpolants[i].doStep(); }
void FORCEINLINE doStepExtraInterpolants() { for(int i=0;i<NUM_INTERPOLANTS;i++) interpolants[i].doStepExtra(); }
};
FORCEINLINE edge_fx_fl::edge_fx_fl(int Top, int Bottom) {
Y = Ceil28_4((fixed28_4)verts[Top]->y);
int YEnd = Ceil28_4((fixed28_4)verts[Bottom]->y);
Height = YEnd - Y;
if(Height)
//draws a single scanline
FORCEINLINE void drawscanline(edge_fx_fl *pLeft, edge_fx_fl *pRight)
{
long dN = long(verts[Bottom]->y - verts[Top]->y);
long dM = long(verts[Bottom]->x - verts[Top]->x);
long InitialNumerator = (long)(dM*16*Y - dM*verts[Top]->y + dN*verts[Top]->x - 1 + dN*16);
FloorDivMod(InitialNumerator,dN*16,X,ErrorTerm);
FloorDivMod(dM*16,dN*16,XStep,Numerator);
Denominator = dN*16;
float YPrestep = Fixed28_4ToFloat((fixed28_4)(Y*16 - verts[Top]->y));
float XPrestep = Fixed28_4ToFloat((fixed28_4)(X*16 - verts[Top]->x));
float dy = 1/Fixed28_4ToFloat(dN);
float dx = 1/Fixed28_4ToFloat(dM);
invw.initialize(1/verts[Top]->w,1/verts[Bottom]->w,dx,dy,XStep,XPrestep,YPrestep);
u.initialize(verts[Top]->u,verts[Bottom]->u,dx,dy,XStep,XPrestep,YPrestep);
v.initialize(verts[Top]->v,verts[Bottom]->v,dx,dy,XStep,XPrestep,YPrestep);
z.initialize(verts[Top]->z,verts[Bottom]->z,dx,dy,XStep,XPrestep,YPrestep);
for(int i=0;i<3;i++)
color[i].initialize(verts[Top]->fcolor[i],verts[Bottom]->fcolor[i],dx,dy,XStep,XPrestep,YPrestep);
}
}
FORCEINLINE int edge_fx_fl::Step() {
X += XStep; Y++; Height--;
doStepInterpolants();
ErrorTerm += Numerator;
if(ErrorTerm >= Denominator) {
X++;
ErrorTerm -= Denominator;
doStepExtraInterpolants();
}
return Height;
}
//draws a single scanline
FORCEINLINE static void drawscanline(edge_fx_fl *pLeft, edge_fx_fl *pRight)
{
int XStart = pLeft->X;
int width = pRight->X - XStart;
@ -801,32 +774,35 @@ FORCEINLINE static void drawscanline(edge_fx_fl *pLeft, edge_fx_fl *pRight)
color[1] += dc_dx[1];
color[2] += dc_dx[2];
}
}
}
//runs several scanlines, until an edge is finished
static void runscanlines(edge_fx_fl *left, edge_fx_fl *right)
{
//runs several scanlines, until an edge is finished
template<bool SLI>
void runscanlines(edge_fx_fl *left, edge_fx_fl *right)
{
//do not overstep either of the edges
int Height = min(left->Height,right->Height);
while(Height--) {
if(!SLI || (left->Y & SLI_MASK) == SLI_VALUE)
drawscanline(left,right);
left->Step();
right->Step();
}
}
}
//rotates verts counterclockwise
template<int type>
INLINE static void rot_verts() {
//rotates verts counterclockwise
template<int type>
INLINE void rot_verts() {
#define ROTSWAP(X) if(type>X) swap(verts[X-1],verts[X]);
ROTSWAP(1); ROTSWAP(2); ROTSWAP(3); ROTSWAP(4);
ROTSWAP(5); ROTSWAP(6); ROTSWAP(7);
}
ROTSWAP(5); ROTSWAP(6); ROTSWAP(7); ROTSWAP(8); ROTSWAP(9);
}
//rotate verts until vert0.y is minimum, and then vert0.x is minimum in case of ties
//this is a necessary precondition for our shape engine
template<int type>
static void sort_verts(bool backwards) {
//rotate verts until vert0.y is minimum, and then vert0.x is minimum in case of ties
//this is a necessary precondition for our shape engine
template<int type>
void sort_verts(bool backwards) {
//if the verts are backwards, reorder them first
if(backwards)
for(int i=0;i<type/2;i++)
@ -837,7 +813,7 @@ static void sort_verts(bool backwards) {
//this was the only way we could get this to unroll
#define CHECKY(X) if(type>X) if(verts[0]->y > verts[X]->y) goto doswap;
CHECKY(1); CHECKY(2); CHECKY(3); CHECKY(4);
CHECKY(5); CHECKY(6); CHECKY(7);
CHECKY(5); CHECKY(6); CHECKY(7); CHECKY(8); CHECKY(9);
break;
doswap:
@ -847,13 +823,14 @@ static void sort_verts(bool backwards) {
while(verts[0]->y == verts[1]->y && verts[0]->x > verts[1]->x)
rot_verts<type>();
}
}
//This function can handle any convex N-gon up to octagons
//verts must be clockwise.
//I didnt reference anything for this algorithm but it seems like I've seen it somewhere before.
static void shape_engine(int type, bool backwards)
{
//This function can handle any convex N-gon up to octagons
//verts must be clockwise.
//I didnt reference anything for this algorithm but it seems like I've seen it somewhere before.
template<bool SLI>
void shape_engine(int type, bool backwards)
{
failure = false;
switch(type) {
@ -863,6 +840,8 @@ static void shape_engine(int type, bool backwards)
case 6: sort_verts<6>(backwards); break;
case 7: sort_verts<7>(backwards); break;
case 8: sort_verts<8>(backwards); break;
case 9: sort_verts<9>(backwards); break;
case 10: sort_verts<10>(backwards); break;
default: printf("skipping type %d\n",type); return;
}
@ -879,17 +858,23 @@ static void shape_engine(int type, bool backwards)
//so that they can be continued on down the shape
assert(rv != type);
int _lv = lv==type?0:lv; //make sure that we ask for vert 0 when the variable contains the starting value
if(step_left) left = edge_fx_fl(_lv,lv-1);
if(step_right) right = edge_fx_fl(rv,rv+1);
if(step_left) left = edge_fx_fl(_lv,lv-1,(VERT**)&verts);
if(step_right) right = edge_fx_fl(rv,rv+1,(VERT**)&verts);
step_left = step_right = false;
//handle a failure in the edge setup due to nutty polys
if(failure)
return;
if(left.Height<0 || right.Height<0)
{
//i have NO IDEA WHY THIS HAPPENS
//but i think it was corrupting things in a bad way
//which was only revealed by the multicored rasterizer
return;
}
runscanlines(&left,&right);
runscanlines<SLI>(&left,&right);
//if we ran out of an edge, step to the next one
if(right.Height == 0) {
@ -905,6 +890,72 @@ static void shape_engine(int type, bool backwards)
if(lv<=rv+1) break;
}
}
template<bool SLI>
void mainLoop()
{
lastTexKey = NULL;
//a counter for how many polys got culled
int culled = 0;
u32 lastPolyAttr = 0;
u32 lastTextureFormat = 0, lastTexturePalette = 0;
//iterate over polys
for(int i=0;i<clippedPolyCounter;i++)
{
if(!polyVisible[i]) continue;
GFX3D_Clipper::TClippedPoly &clippedPoly = clippedPolys[i];
POLY *poly = clippedPoly.poly;
int type = clippedPoly.type;
//if(i == 0 || lastPolyAttr != poly->polyAttr)
{
polyAttr.setup(poly->polyAttr);
polyAttr.translucent = poly->isTranslucent();
lastPolyAttr = poly->polyAttr;
}
//if(i == 0 || lastTextureFormat != poly->texParam || lastTexturePalette != poly->texPalette)
{
sampler.setup(poly->texParam);
lastTextureFormat = poly->texParam;
lastTexturePalette = poly->texPalette;
}
lastTexKey = polyTexKeys[i];
//hmm... shader gets setup every time because it depends on sampler which may have just changed
shader.setup(poly->polyAttr);
for(int j=0;j<type;j++)
this->verts[j] = &clippedPoly.clipVerts[j];
for(int j=type;j<MAX_CLIPPED_VERTS;j++)
this->verts[j] = NULL;
polyAttr.backfacing = polyBackfacing[i];
shape_engine<SLI>(type,!polyAttr.backfacing);
}
}
}; //rasterizerUnit
static Task rasterizerUnitTask[4];
static RasterizerUnit rasterizerUnit[4];
static int rasterizerCores;
void* execRasterizerUnit(void* arg)
{
s32 which = (s32)arg;
rasterizerUnit[which].mainLoop<true>();
return 0;
}
static char SoftRastInit(void)
@ -912,6 +963,37 @@ static char SoftRastInit(void)
static bool tables_generated = false;
if(!tables_generated)
{
if(CommonSettings.num_cores>=4)
{
rasterizerCores = 4;
rasterizerUnit[0].SLI_MASK = 3;
rasterizerUnit[1].SLI_MASK = 3;
rasterizerUnit[2].SLI_MASK = 3;
rasterizerUnit[3].SLI_MASK = 3;
rasterizerUnit[0].SLI_VALUE = 0;
rasterizerUnit[1].SLI_VALUE = 1;
rasterizerUnit[2].SLI_VALUE = 2;
rasterizerUnit[3].SLI_VALUE = 3;
rasterizerUnitTask[0].start(false);
rasterizerUnitTask[1].start(false);
rasterizerUnitTask[2].start(false);
rasterizerUnitTask[3].start(false);
} else if(CommonSettings.num_cores>1)
{
rasterizerCores = 2;
rasterizerUnit[0].SLI_MASK = 1;
rasterizerUnit[1].SLI_MASK = 1;
rasterizerUnit[0].SLI_VALUE = 0;
rasterizerUnit[1].SLI_VALUE = 1;
rasterizerUnitTask[0].start(false);
rasterizerUnitTask[1].start(false);
} else {
rasterizerCores = 1;
rasterizerUnit[0].SLI_MASK = 0;
rasterizerUnit[0].SLI_VALUE = 0;
}
tables_generated = true;
clipper.clippedPolys = clippedPolys = new GFX3D_Clipper::TClippedPoly[POLYLIST_SIZE*2];
@ -942,8 +1024,6 @@ static char SoftRastInit(void)
}
TexCache_Reset();
TexCache_BindTexture = BindTexture;
TexCache_BindTextureData = BindTextureData;
printf("SoftRast Initialized\n");
return 1;
@ -969,7 +1049,7 @@ static void SoftRastFramebufferProcess()
// - the edges are completely sharp/opaque on the very brief title screen intro,
// - the level-start intro gets a pseudo-antialiasing effect around the silhouette,
// - the character edges in-level are clearly transparent, and also show well through shield powerups.
if(gfx3d.enableEdgeMarking)
if(gfx3d.enableEdgeMarking && CommonSettings.GFX3D_EdgeMark)
{
//TODO - need to test and find out whether these get grabbed at flush time, or at render time
//we can do this by rendering a 3d frame and then freezing the system, but only changing the edge mark colors
@ -1039,7 +1119,7 @@ static void SoftRastFramebufferProcess()
}
}
if(gfx3d.enableFog)
if(gfx3d.enableFog && CommonSettings.GFX3D_Fog)
{
u32 r = GFX3D_5TO6((gfx3d.fogColor)&0x1F);
u32 g = GFX3D_5TO6((gfx3d.fogColor>>5)&0x1F);
@ -1070,8 +1150,6 @@ static void SoftRastConvertFramebuffer()
memcpy(gfx3d_convertedScreen,screenColor,256*192*4);
}
static void SoftRastRender()
{
Fragment clearFragment;
@ -1144,7 +1222,7 @@ static void SoftRastRender()
}
//setup fog variables (but only if fog is enabled)
if(gfx3d.enableFog)
if(gfx3d.enableFog && CommonSettings.GFX3D_Fog)
{
u8* fogDensity = MMU.MMU_MEM[ARMCPU_ARM9][0x40] + 0x360;
#if 0
@ -1260,29 +1338,20 @@ static void SoftRastRender()
}
}
//a counter for how many polys got culled
int culled = 0;
u32 lastTextureFormat = 0, lastTexturePalette = 0, lastPolyAttr = 0;
//iterate over polys
ADPCMCacheItem* lastTexKey = NULL;
u32 lastTextureFormat = 0, lastTexturePalette = 0;
bool needInitTexture = true;
for(int i=0;i<clippedPolyCounter;i++)
{
polynum = i;
GFX3D_Clipper::TClippedPoly &clippedPoly = clippedPolys[i];
POLY *poly = clippedPoly.poly;
int type = clippedPoly.type;
VERT* verts = &clippedPoly.clipVerts[0];
if(i == 0 || lastPolyAttr != poly->polyAttr)
{
RasterizerUnit::PolyAttr polyAttr;
polyAttr.setup(poly->polyAttr);
polyAttr.translucent = poly->isTranslucent();
lastPolyAttr = poly->polyAttr;
}
//HACK: backface culling
//this should be moved to gfx3d, but first we need to redo the way the lists are built
@ -1305,24 +1374,17 @@ static void SoftRastRender()
float facing = (verts[0].y + verts[n].y) * (verts[0].x - verts[n].x)
+ (verts[1].y + verts[0].y) * (verts[1].x - verts[0].x)
+ (verts[2].y + verts[1].y) * (verts[2].x - verts[1].x);
for(int i = 2; i < n; i++)
facing += (verts[i+1].y + verts[i].y) * (verts[i+1].x - verts[i].x);
polyAttr.backfacing = (facing < 0);
for(int j = 2; j < n; j++)
facing += (verts[j+1].y + verts[j].y) * (verts[j+1].x - verts[j].x);
polyBackfacing[i] = polyAttr.backfacing = (facing < 0);
#endif
if(!polyAttr.isVisible(polyAttr.backfacing)) {
culled++;
polyVisible[i] = false;
continue;
}
if(needInitTexture || lastTextureFormat != poly->texParam || lastTexturePalette != poly->texPalette)
{
TexCache_SetTexture<TexFormat_15bpp>(poly->texParam,poly->texPalette);
sampler.setup(poly->texParam);
lastTextureFormat = poly->texParam;
lastTexturePalette = poly->texPalette;
needInitTexture = false;
}
polyVisible[i] = true;
//here is a hack which needs to be removed.
//at some point our shape engine needs these to be converted to "fixed point"
@ -1331,15 +1393,31 @@ static void SoftRastRender()
for(int k=0;k<2;k++)
verts[j].coord[k] = (float)iround(16.0f * verts[j].coord[k]);
//hmm... shader gets setup every time because it depends on sampler which may have just changed
shader.setup(poly->polyAttr);
for(int j=0;j<MAX_CLIPPED_VERTS;j++)
::verts[j] = &verts[j];
shape_engine(type,!polyAttr.backfacing);
//make sure all the textures we'll need are cached
if(needInitTexture || lastTextureFormat != poly->texParam || lastTexturePalette != poly->texPalette)
{
lastTexKey = TexCache_SetTexture(TexFormat_15bpp,poly->texParam,poly->texPalette);
lastTextureFormat = poly->texParam;
lastTexturePalette = poly->texPalette;
needInitTexture = false;
}
//printf("%08X %d\n",poly->texParam,rasterizerUnit[0].textures.currentNum);
polyTexKeys[i] = lastTexKey;
}
if(rasterizerCores==1)
{
rasterizerUnit[0].mainLoop<false>();
}
else
{
for(int i=0;i<rasterizerCores;i++) rasterizerUnitTask[i].execute(execRasterizerUnit,(void*)i);
for(int i=0;i<rasterizerCores;i++) rasterizerUnitTask[i].finish();
}
TexCache_EvictFrame();
SoftRastFramebufferProcess();
// printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count);
@ -1354,3 +1432,4 @@ GPU3DInterface gpu3DRasterize = {
SoftRastRender,
SoftRastVramReconfigureSignal,
};

View File

@ -1,8 +1,4 @@
/* Copyright (C) 2006 yopyop
yopyop156@ifrance.com
yopyop156.ifrance.com
Copyright 2009 DeSmuME team
/* Copyright 2009 DeSmuME team
This file is part of DeSmuME

View File

@ -1,7 +1,8 @@
#include "texcache.h"
#include <string.h>
#include <algorithm>
#include <assert.h>
#include "texcache.h"
#include "bits.h"
#include "common.h"
@ -15,6 +16,8 @@ using std::max;
//only dump this from ogl renderer. for now, softrasterizer creates things in an incompatible pixel format
//#define DEBUG_DUMP_TEXTURE
#define CONVERT(color,alpha) ((TEXFORMAT == TexFormat_32bpp)?(RGB15TO32(color,alpha)):RGB15TO6665(color,alpha))
//This class represents a number of regions of memory which should be viewed as contiguous
class MemSpan
{
@ -54,6 +57,8 @@ public:
return 0;
}
//TODO - get rid of duplication between these two methods.
//dumps the memspan to the specified buffer
//you may set size to limit the size to be copied
int dump(void* buf, int size=-1)
@ -160,12 +165,6 @@ static MemSpan MemSpan_TexPalette(u32 ofs, u32 len)
return ret;
}
TextureCache *texcache;
u32 texcache_start;
u32 texcache_stop;
u8 *TexCache_texMAP = NULL;
#if defined (DEBUG_DUMP_TEXTURE) && defined (WIN32)
#define DO_DEBUG_DUMP_TEXTURE
static void DebugDumpTexture(int which)
@ -178,28 +177,59 @@ static void DebugDumpTexture(int which)
#endif
static int lastTexture = -1;
#define CONVERT(color,alpha) ((TEXFORMAT == TexFormat_32bpp)?(RGB15TO32(color,alpha)):RGB15TO6665(color,alpha))
template<TexCache_TexFormat TEXFORMAT>
void TexCache_SetTexture(u32 format, u32 texpal)
//notes on the cache:
//I am really unhappy with the ref counting. this needs to be automatic.
//We could do something better than a linear search through cache items, but it may not be worth it.
//Also we may need to rescan more often (every time a sample loops)
class ADPCMCache
{
public:
ADPCMCache()
: list_front(NULL)
, list_back(NULL)
, cache_size(0)
{}
ADPCMCacheItem *list_front, *list_back;
//this ought to be enough for anyone
static const u32 kMaxCacheSize = 64*1024*1024;
//this is not really precise, it is off by a constant factor
u32 cache_size;
void list_remove(ADPCMCacheItem* item) {
if(item->next) item->next->prev = item->prev;
if(item->prev) item->prev->next = item->next;
if(item == list_front) list_front = item->next;
if(item == list_back) list_back = item->prev;
}
void list_push_front(ADPCMCacheItem* item)
{
item->next = list_front;
if(list_front) list_front->prev = item;
else list_back = item;
item->prev = NULL;
list_front = item;
}
template<TexCache_TexFormat TEXFORMAT>
ADPCMCacheItem* scan(u32 format, u32 texpal)
{
//for each texformat, number of palette entries
const int palSizes[] = {0, 32, 4, 16, 256, 0, 8, 0};
static const int palSizes[] = {0, 32, 4, 16, 256, 0, 8, 0};
//for each texformat, multiplier from numtexels to numbytes (fixed point 30.2)
const int texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8};
static const int texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8};
//used to hold a copy of the palette specified for this texture
u16 pal[256];
u32 *dwdst = (u32*)TexCache_texMAP;
u32 textureMode = (unsigned short)((format>>26)&0x07);
unsigned int sizeX=(8 << ((format>>20)&0x07));
unsigned int sizeY=(8 << ((format>>23)&0x07));
unsigned int imageSize = sizeX*sizeY;
u32 sizeX=(8 << ((format>>20)&0x07));
u32 sizeY=(8 << ((format>>23)&0x07));
u32 imageSize = sizeX*sizeY;
u8 *adr;
@ -246,110 +276,101 @@ void TexCache_SetTexture(u32 format, u32 texpal)
//dump the palette to a temp buffer, so that we don't have to worry about memory mapping.
//this isnt such a problem with texture memory, because we read sequentially from it.
//however, we read randomly from palette memory, so the mapping is more costly.
#ifdef WORDS_BIGENDIAN
#ifdef WORDS_BIGENDIAN
mspal.dump16(pal);
#else
#else
mspal.dump(pal);
#endif
#endif
u32 tx=texcache_start;
//if(false)
while (TRUE)
for(ADPCMCacheItem* curr = list_front;curr;curr=curr->next)
{
//conditions where we give up and regenerate the texture:
if (texcache_stop == tx) break;
if (texcache[tx].frm == 0) break;
//conditions where we reject matches:
//when the teximage or texpal params dont match
//(this is our key for identifying palettes in the cache)
if (texcache[tx].frm != format) goto REJECT;
if (texcache[tx].pal != texpal) goto REJECT;
//(this is our key for identifying textures in the cache)
if(curr->texformat != format) continue;
if(curr->texpal != texpal) continue;
//we're being asked for a different format than what we had cached.
if(curr->cacheFormat != TEXFORMAT) goto REJECT;
//not used anymore -- add another method to purge suspicious items from the cache
//the texture matches params, but isnt suspected invalid. accept it.
if (!texcache[tx].suspectedInvalid) goto ACCEPT;
//if we couldnt cache this entire texture due to it being too large, then reject it
if (texSize+indexSize > (int)sizeof(texcache[tx].dump.texture)) goto REJECT;
if (!curr->suspectedInvalid) return curr;
//when the palettes dont match:
//note that we are considering 4x4 textures to have a palette size of 0.
//they really have a potentially HUGE palette, too big for us to handle like a normal palette,
//so they go through a different system
if (mspal.size != 0 && memcmp(texcache[tx].dump.palette,pal,mspal.size)) goto REJECT;
if(mspal.size != 0 && memcmp(curr->dump.palette,pal,mspal.size)) goto REJECT;
//when the texture data doesn't match
if(ms.memcmp(texcache[tx].dump.texture,sizeof(texcache[tx].dump.texture))) goto REJECT;
if(ms.memcmp(curr->dump.texture,sizeof(curr->dump.texture))) goto REJECT;
//if the texture is 4x4 then the index data must match
if(textureMode == TEXMODE_4X4)
{
if(msIndex.memcmp(texcache[tx].dump.texture + texcache[tx].dump.textureSize,texcache[tx].dump.indexSize)) goto REJECT;
if(msIndex.memcmp(curr->dump.texture + curr->dump.textureSize,curr->dump.indexSize)) goto REJECT;
}
//we found a match. just return it
//curr->lock();
list_remove(curr);
list_push_front(curr);
return curr;
ACCEPT:
texcache[tx].suspectedInvalid = false;
if(lastTexture == -1 || (int)tx != lastTexture)
{
lastTexture = tx;
if(TexCache_BindTexture)
TexCache_BindTexture(tx);
}
return;
REJECT:
tx++;
if ( tx > MAX_TEXTURE )
{
texcache_stop=texcache_start;
texcache[texcache_stop].frm=0;
texcache_start++;
if (texcache_start>MAX_TEXTURE)
{
texcache_start=0;
texcache_stop=MAX_TEXTURE<<1;
}
tx=0;
}
REJECT:
//we found a cached item for the current address, but the data is stale.
//for a variety of complicated reasons, we need to throw it out right this instant.
list_remove(curr);
delete curr;
break;
}
lastTexture = tx;
//glBindTexture(GL_TEXTURE_2D, texcache[tx].id);
//item was not found. recruit an existing one (the oldest), or create a new one
//evict(); //reduce the size of the cache if necessary
//TODO - as a peculiarity of the texcache, eviction must happen after the entire 3d frame runs
//to support separate cache and read passes
ADPCMCacheItem* newitem = new ADPCMCacheItem();
list_push_front(newitem);
//newitem->lock();
newitem->suspectedInvalid = false;
newitem->texformat = format;
newitem->cacheFormat = TEXFORMAT;
newitem->texpal = texpal;
newitem->sizeX=sizeX;
newitem->sizeY=sizeY;
newitem->invSizeX=1.0f/((float)(sizeX));
newitem->invSizeY=1.0f/((float)(sizeY));
newitem->dump.textureSize = ms.dump(newitem->dump.texture,sizeof(newitem->dump.texture));
newitem->decode_len = sizeX*sizeY*4;
newitem->mode = textureMode;
cache_size += newitem->decode_len;
newitem->decoded = new u8[newitem->decode_len];
u32 *dwdst = (u32*)newitem->decoded;
texcache[tx].suspectedInvalid = false;
texcache[tx].frm=format;
texcache[tx].mode=textureMode;
texcache[tx].pal=texpal;
texcache[tx].sizeX=sizeX;
texcache[tx].sizeY=sizeY;
texcache[tx].invSizeX=1.0f/((float)(sizeX));
texcache[tx].invSizeY=1.0f/((float)(sizeY));
texcache[tx].dump.textureSize = ms.dump(texcache[tx].dump.texture,sizeof(texcache[tx].dump.texture));
//dump palette data for cache keying
if ( palSize )
if(palSize)
{
memcpy(texcache[tx].dump.palette, pal, palSize*2);
memcpy(newitem->dump.palette, pal, palSize*2);
}
//dump 4x4 index data for cache keying
texcache[tx].dump.indexSize = 0;
newitem->dump.indexSize = 0;
if(textureMode == TEXMODE_4X4)
{
texcache[tx].dump.indexSize = min(msIndex.size,(int)sizeof(texcache[tx].dump.texture) - texcache[tx].dump.textureSize);
msIndex.dump(texcache[tx].dump.texture+texcache[tx].dump.textureSize,texcache[tx].dump.indexSize);
newitem->dump.indexSize = min(msIndex.size,(int)sizeof(newitem->dump.texture) - newitem->dump.textureSize);
msIndex.dump(newitem->dump.texture+newitem->dump.textureSize,newitem->dump.indexSize);
}
//============================================================================
//Texture conversion
//============================================================================
//INFO("Texture %03i - format=%08X; pal=%04X (mode %X, width %04i, height %04i)\n",i, texcache[i].frm, texcache[i].pal, texcache[i].mode, sizeX, sizeY);
//============================================================================ Texture conversion
const u32 opaqueColor = TEXFORMAT==TexFormat_32bpp?255:31;
u32 palZeroTransparent = (1-((format>>29)&1))*opaqueColor;
switch (texcache[tx].mode)
switch (newitem->mode)
{
case TEXMODE_A3I5:
{
@ -366,9 +387,9 @@ REJECT:
adr++;
}
}
break;
}
case TEXMODE_I2:
{
for(int j=0;j<ms.numItems;j++) {
@ -443,20 +464,20 @@ REJECT:
//this check isnt necessary since the addressing is tied to the texture data which will also run out:
//if(msIndex.numItems != 1) PROGINFO("Your 4x4 texture index has overrun its slot.\n");
#define PAL4X4(offset) ( *(u16*)( MMU.texInfo.texPalSlot[((paletteAddress + (offset)*2)>>14)] + ((paletteAddress + (offset)*2)&0x3FFF) ) )
#define PAL4X4(offset) ( *(u16*)( MMU.texInfo.texPalSlot[((paletteAddress + (offset)*2)>>14)] + ((paletteAddress + (offset)*2)&0x3FFF) ) )
u16* slot1;
u32* map = (u32*)ms.items[0].ptr;
u32 limit = ms.items[0].len<<2;
u32 d = 0;
if ( (texcache[tx].frm & 0xc000) == 0x8000)
if ( (format & 0xc000) == 0x8000)
// texel are in slot 2
slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][((texcache[tx].frm & 0x3FFF)<<2)+0x010000];
slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][((format & 0x3FFF)<<2)+0x010000];
else
slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][(texcache[tx].frm & 0x3FFF)<<2];
slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][(format & 0x3FFF)<<2];
u16 yTmpSize = (texcache[tx].sizeY>>2);
u16 xTmpSize = (texcache[tx].sizeX>>2);
u16 yTmpSize = (sizeY>>2);
u16 xTmpSize = (sizeX>>2);
//this is flagged whenever a 4x4 overruns its slot.
//i am guessing we just generate black in that case
@ -464,8 +485,8 @@ REJECT:
for (int y = 0; y < yTmpSize; y ++)
{
u32 tmpPos[4]={(y<<2)*texcache[tx].sizeX,((y<<2)+1)*texcache[tx].sizeX,
((y<<2)+2)*texcache[tx].sizeX,((y<<2)+3)*texcache[tx].sizeX};
u32 tmpPos[4]={(y<<2)*sizeX,((y<<2)+1)*sizeX,
((y<<2)+2)*sizeX,((y<<2)+3)*sizeX};
for (int x = 0; x < xTmpSize; x ++, d++)
{
if(d >= limit)
@ -598,56 +619,85 @@ REJECT:
}
break;
}
}
} //switch(texture format)
if(TexCache_BindTextureData != 0)
TexCache_BindTextureData(tx,TexCache_texMAP);
/*if(user)
user->BindTextureData(tx,TexCache_texMAP);
#ifdef DO_DEBUG_DUMP_TEXTURE
DebugDumpTexture(tx);
#endif
#endif*/
}
return newitem;
} //scan()
void evict(const u32 target = kMaxCacheSize) {
//evicts old cache items until it is less than the max cache size
//this means we actually can exceed the cache by the size of the next item.
//if we really wanted to hold ourselves to it, we could evict to kMaxCacheSize-nextItemSize
while(cache_size > target)
{
ADPCMCacheItem *oldest = list_back;
while(oldest && oldest->lockCount>0) oldest = oldest->prev; //find an unlocked one
if(!oldest)
{
//nothing we can do, everything in the cache is locked. maybe we're leaking.
//just quit trying to evict
return;
}
list_remove(oldest);
cache_size -= oldest->decode_len;
//printf("evicting! totalsize:%d\n",cache_size);
delete oldest;
}
}
} adpcmCache;
void TexCache_Reset()
{
if(TexCache_texMAP == NULL) TexCache_texMAP = new u8[1024*2048*4];
if(texcache == NULL) texcache = new TextureCache[MAX_TEXTURE+1];
//if(TexCache_texMAP == NULL) TexCache_texMAP = new u8[1024*2048*4];
//if(texcache == NULL) texcache = new TextureCache[MAX_TEXTURE+1];
memset(texcache,0,sizeof(TextureCache[MAX_TEXTURE+1]));
//memset(texcache,0,sizeof(TextureCache[MAX_TEXTURE+1]));
texcache_start=0;
texcache_stop=MAX_TEXTURE<<1;
}
TextureCache* TexCache_Curr()
{
if(lastTexture == -1)
return NULL;
else return &texcache[lastTexture];
//texcache_start=0;
//texcache_stop=MAX_TEXTURE<<1;
adpcmCache.evict(0);
}
void TexCache_Invalidate()
{
//well, this is a very blunt instrument.
//lets just flag all the textures as invalid.
for(int i=0;i<MAX_TEXTURE+1;i++) {
texcache[i].suspectedInvalid = true;
////well, this is a very blunt instrument.
////lets just flag all the textures as invalid.
//for(int i=0;i<MAX_TEXTURE+1;i++) {
// texcache[i].suspectedInvalid = true;
//invalidate all 4x4 textures when texture palettes change mappings
//this is necessary because we arent tracking 4x4 texture palettes to look for changes.
//Although I concede this is a bit paranoid.. I think the odds of anyone changing 4x4 palette data
//without also changing the texture data is pretty much zero.
//
//TODO - move this to a separate signal: split into TexReconfigureSignal and TexPaletteReconfigureSignal
if(texcache[i].mode == TEXMODE_4X4)
texcache[i].frm = 0;
// //invalidate all 4x4 textures when texture palettes change mappings
// //this is necessary because we arent tracking 4x4 texture palettes to look for changes.
// //Although I concede this is a bit paranoid.. I think the odds of anyone changing 4x4 palette data
// //without also changing the texture data is pretty much zero.
// //
// //TODO - move this to a separate signal: split into TexReconfigureSignal and TexPaletteReconfigureSignal
// if(texcache[i].mode == TEXMODE_4X4)
// texcache[i].frm = 0;
//}
adpcmCache.evict(0);
}
ADPCMCacheItem* TexCache_SetTexture(TexCache_TexFormat TEXFORMAT, u32 format, u32 texpal)
{
switch(TEXFORMAT)
{
case TexFormat_32bpp: return adpcmCache.scan<TexFormat_32bpp>(format,texpal);
case TexFormat_15bpp: return adpcmCache.scan<TexFormat_15bpp>(format,texpal);
default: assert(false); return NULL;
}
}
void (*TexCache_BindTexture)(u32 texnum) = NULL;
void (*TexCache_BindTextureData)(u32 texnum, u8* data);
//these templates needed to be instantiated manually
template void TexCache_SetTexture<TexFormat_32bpp>(u32 format, u32 texpal);
template void TexCache_SetTexture<TexFormat_15bpp>(u32 format, u32 texpal);
//call this periodically to keep the tex cache clean
void TexCache_EvictFrame()
{
adpcmCache.evict();
}

View File

@ -5,47 +5,62 @@
enum TexCache_TexFormat
{
TexFormat_32bpp,
TexFormat_15bpp
TexFormat_None, //used when nothing yet is cached
TexFormat_32bpp, //used by ogl renderer
TexFormat_15bpp //used by rasterizer
};
#define MAX_TEXTURE 500
struct CACHE_ALIGN TextureCache
class ADPCMCacheItem
{
u32 id;
u32 frm;
public:
ADPCMCacheItem()
: decoded(NULL)
, decode_len(0)
, next(NULL)
, prev(NULL)
, lockCount(0)
, cacheFormat(TexFormat_None)
, deleteCallback(NULL)
, suspectedInvalid(false)
{}
~ADPCMCacheItem() {
delete[] decoded;
if(deleteCallback) deleteCallback(this);
}
void unlock() {
lockCount--;
}
void lock() {
lockCount++;
}
u32 decode_len;
u32 mode;
u32 pal;
u32 sizeX;
u32 sizeY;
float invSizeX;
float invSizeY;
u8* decoded; //decoded texture data
ADPCMCacheItem *next, *prev; //double linked list
int lockCount;
bool suspectedInvalid;
u32 texformat, texpal;
u32 sizeX, sizeY;
float invSizeX, invSizeY;
void* texid; //used by ogl renderer for the texid
void (*deleteCallback)(ADPCMCacheItem*);
TexCache_TexFormat cacheFormat;
//TODO - this is a little wasteful
struct {
int textureSize, indexSize;
u8 texture[128*1024]; // 128Kb texture slot
u8 palette[256*2];
} dump;
//set if this texture is suspected be invalid due to a vram reconfigure
bool suspectedInvalid;
};
extern TextureCache *texcache;
extern void (*TexCache_BindTexture)(u32 texnum);
extern void (*TexCache_BindTextureData)(u32 texnum, u8* data);
void TexCache_Reset();
template<TexCache_TexFormat>
void TexCache_SetTexture(u32 format, u32 texpal);
void TexCache_Invalidate();
void TexCache_Reset();
void TexCache_EvictFrame();
extern u8 *TexCache_texMAP;
TextureCache* TexCache_Curr();
ADPCMCacheItem* TexCache_SetTexture(TexCache_TexFormat TEXFORMAT, u32 format, u32 texpal);
#endif

279
desmume/src/utils/task.cpp Normal file
View File

@ -0,0 +1,279 @@
/* Copyright 2009 DeSmuME team
This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "task.h"
#ifdef _WIN32
#include <windows.h>
#include <stdio.h>
class Task::Impl {
public:
Impl();
~Impl();
bool spinlock;
void start(bool spinlock);
//execute some work
void execute(const TWork &work, void* param);
//wait for the work to complete
void* finish();
static DWORD __stdcall s_taskProc(void *ptr);
void taskProc();
void init();
//the work function that shall be executed
TWork work;
void* param;
HANDLE incomingWork, workDone, hThread;
volatile bool bIncomingWork, bWorkDone, bKill;
bool bStarted;
};
static void* killTask(void* task)
{
((Task::Impl*)task)->bKill = true;
return 0;
}
Task::Impl::~Impl()
{
if(!bStarted) return;
execute(killTask,this);
finish();
CloseHandle(incomingWork);
CloseHandle(workDone);
CloseHandle(hThread);
}
Task::Impl::Impl()
: work(NULL)
, bIncomingWork(false)
, bWorkDone(true)
, bKill(false)
, bStarted(false)
{
}
DWORD __stdcall Task::Impl::s_taskProc(void *ptr)
{
//just past the buck to the instance method
((Task::Impl*)ptr)->taskProc();
return 0;
}
void Task::Impl::taskProc()
{
for(;;) {
if(bKill) break;
//wait for a chunk of work
if(spinlock) while(!bIncomingWork) Sleep(0);
else WaitForSingleObject(incomingWork,INFINITE);
bIncomingWork = false;
//execute the work
param = work(param);
//signal completion
if(!spinlock) SetEvent(workDone);
bWorkDone = true;
}
}
void Task::Impl::start(bool spinlock)
{
bStarted = true;
this->spinlock = spinlock;
incomingWork = CreateEvent(NULL,FALSE,FALSE,NULL);
workDone = CreateEvent(NULL,FALSE,FALSE,NULL);
hThread = CreateThread(NULL,0,Task::Impl::s_taskProc,(void*)this, 0, NULL);
}
void Task::Impl::execute(const TWork &work, void* param)
{
//setup the work
this->work = work;
this->param = param;
bWorkDone = false;
//signal it to start
if(!spinlock) SetEvent(incomingWork);
bIncomingWork = true;
}
void* Task::Impl::finish()
{
//just wait for the work to be done
if(spinlock)
while(!bWorkDone)
Sleep(0);
else WaitForSingleObject(workDone,INFINITE);
return param;
}
#else
//just a stub impl that doesnt actually do any threading.
//somebody needs to update the pthread implementation below
class Task::Impl {
public:
Impl() {}
~Impl() {}
void start(bool spinlock) {}
void* ret;
void execute(const TWork &work, void* param) { ret = work(param); }
void* finish() { return ret; }
};
/*
#include <pthread.h>
class Task::Impl {
public:
Impl();
//execute some work
void execute(const TWork &work, void* param);
//wait for the work to complete
void* finish();
pthread_t thread;
static void* s_taskProc(void *ptr);
void taskProc();
void init();
//the work function that shall be executed
TWork work;
void* param;
bool initialized;
struct WaitEvent
{
WaitEvent()
: condition(PTHREAD_COND_INITIALIZER)
, mutex(PTHREAD_MUTEX_INITIALIZER)
, value(false)
{}
pthread_mutex_t mutex;
pthread_cond_t condition;
bool value;
//waits for the WaitEvent to be set
void waitAndClear()
{
lock();
if(!value)
pthread_cond_wait( &condition, &mutex );
value = false;
unlock();
}
//sets the WaitEvent
void signal()
{
lock();
if(!value) {
value = true;
pthread_cond_signal( &condition );
}
unlock();
}
//locks the condition's mutex
void lock() { pthread_mutex_lock(&mutex); }
//unlocks the condition's mutex
void unlock() { pthread_mutex_unlock( &mutex ); }
} incomingWork, workDone;
};
Task::Impl::Impl()
: work(NULL)
, initialized(false)
{
}
void* Task::Impl::s_taskProc(void *ptr)
{
//just past the buck to the instance method
((Task::Impl*)ptr)->taskProc();
return 0;
}
void Task::Impl::taskProc()
{
for(;;) {
//wait for a chunk of work
incomingWork.waitAndClear();
//execute the work
param = work(param);
//signal completion
workDone.signal();
}
}
void Task::Impl::init()
{
pthread_create( &thread, NULL, Task::Impl::s_taskProc, (void*)this );
initialized = true;
}
void Task::Impl::execute(const TWork &work, void* param)
{
//initialization is deferred to the first execute to give win32 time to startup
if(!initialized) init();
//setup the work
this->work = work;
this->param = param;
//signal it to start
incomingWork.signal();
}
void* Task::Impl::finish()
{
//just wait for the work to be done
workDone.waitAndClear();
return param;
}
*/
#endif
void Task::start(bool spinlock) { impl->start(spinlock); }
Task::Task() : impl(new Task::Impl()) {}
Task::~Task() { delete impl; }
void Task::execute(const TWork &work, void* param) { impl->execute(work,param); }
void* Task::finish() { return impl->finish(); }

46
desmume/src/utils/task.h Normal file
View File

@ -0,0 +1,46 @@
/* Copyright 2009 DeSmuME team
This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _TASK_H_
//Sort of like a single-thread thread pool.
//You hand it a worker function and then call finish() to synch with its completion
class Task
{
public:
Task();
~Task();
typedef void * (*TWork)(void *);
void start(bool spinlock);
//execute some work
void execute(const TWork &work, void* param);
//wait for the work to complete
void* finish();
class Impl;
Impl *impl;
};
#endif

View File

@ -643,10 +643,6 @@
RelativePath=".\aviout.h"
>
</File>
<File
RelativePath=".\buildconfig.h"
>
</File>
<File
RelativePath=".\cheatsWin.cpp"
>
@ -971,6 +967,14 @@
RelativePath="..\utils\md5.h"
>
</File>
<File
RelativePath="..\utils\task.cpp"
>
</File>
<File
RelativePath="..\utils\task.h"
>
</File>
<File
RelativePath="..\utils\valuearray.h"
>

View File

@ -1019,6 +1019,14 @@
RelativePath="..\utils\md5.h"
>
</File>
<File
RelativePath="..\utils\task.cpp"
>
</File>
<File
RelativePath="..\utils\task.h"
>
</File>
<File
RelativePath="..\utils\valuearray.h"
>

View File

@ -1002,7 +1002,7 @@ static void DoDisplay(bool firstTime)
//on single core systems, draw straight to the screen
//we only do this once per emulated frame because we don't want to waste time redrawing
//on such lousy computers
if(CommonSettings.single_core)
if(CommonSettings.single_core())
{
aggDraw.hud->attach((u8*)video.buffer, 256, 384, 1024);
DoDisplay_DrawHud();
@ -1025,7 +1025,7 @@ static void DoDisplay(bool firstTime)
//apply user's filter
video.filter();
if(!CommonSettings.single_core)
if(!CommonSettings.single_core())
{
//draw and composite the OSD (but not if we are drawing osd straight to screen)
DoDisplay_DrawHud();
@ -1081,7 +1081,7 @@ void KillDisplay()
void Display()
{
if(CommonSettings.single_core)
if(CommonSettings.single_core())
{
video.srcBuffer = (u8*)GPU_screen;
DoDisplay(true);
@ -1229,7 +1229,7 @@ static void StepRunLoop_Paused()
Sleep(100);
// periodically update single-core OSD when paused and in the foreground
if(CommonSettings.single_core && GetActiveWindow() == mainLoopData.hwnd)
if(CommonSettings.single_core() && GetActiveWindow() == mainLoopData.hwnd)
{
video.srcBuffer = (u8*)GPU_screen;
DoDisplay(true);
@ -1718,7 +1718,7 @@ class WinDriver : public BaseDriver
// in multi-core mode now the display thread will probably
// wait for an invocation in this thread to happen,
// so handle that ASAP
if(!CommonSettings.single_core)
if(!CommonSettings.single_core())
{
ResetEvent(display_invoke_ready_event);
SetEvent(display_wakeup_event);
@ -1844,11 +1844,7 @@ int _main()
//this helps give a substantial speedup for singlecore users
SYSTEM_INFO systemInfo;
GetSystemInfo(&systemInfo);
if(systemInfo.dwNumberOfProcessors==1)
CommonSettings.single_core = true;
else
CommonSettings.single_core = false;
CommonSettings.num_cores = systemInfo.dwNumberOfProcessors;
char text[80];
@ -1948,7 +1944,7 @@ int _main()
//in case this isnt actually a singlecore system, but the user requested it
//then restrict ourselves to one core
if(CommonSettings.single_core)
if(CommonSettings.single_core())
SetProcessAffinityMask(GetCurrentProcess(),1);
MainWindow = new WINCLASS(CLASSNAME, hAppInst);
@ -2130,7 +2126,9 @@ int _main()
hKeyInputTimer = timeSetEvent (KeyInRepeatMSec, 0, KeyInputTimer, 0, TIME_PERIODIC);
cur3DCore = GetPrivateProfileInt("3D", "Renderer", GPU3D_OPENGL, IniName);
CommonSettings.HighResolutionInterpolateColor = GetPrivateProfileBool("3D", "HighResolutionInterpolateColor", 1, IniName);
CommonSettings.GFX3D_HighResolutionInterpolateColor = GetPrivateProfileBool("3D", "HighResolutionInterpolateColor", 1, IniName);
CommonSettings.GFX3D_EdgeMark = GetPrivateProfileBool("3D", "EnableEdgeMark", 1, IniName);
CommonSettings.GFX3D_Fog = GetPrivateProfileBool("3D", "EnableFog", 1, IniName);
//CommonSettings.gfx3d_flushMode = GetPrivateProfileInt("3D", "AlternateFlush", 0, IniName);
NDS_3D_ChangeCore(cur3DCore);
@ -3379,7 +3377,7 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM
}
else
{
if(CommonSettings.single_core)
if(CommonSettings.single_core())
{
video.srcBuffer = (u8*)GPU_screen;
DoDisplay(true);
@ -4406,7 +4404,9 @@ LRESULT CALLBACK GFX3DSettingsDlgProc(HWND hw, UINT msg, WPARAM wp, LPARAM lp)
{
int i;
CheckDlgButton(hw,IDC_INTERPOLATECOLOR,CommonSettings.HighResolutionInterpolateColor?1:0);
CheckDlgButton(hw,IDC_INTERPOLATECOLOR,CommonSettings.GFX3D_HighResolutionInterpolateColor?1:0);
CheckDlgButton(hw,IDC_3DSETTINGS_EDGEMARK,CommonSettings.GFX3D_EdgeMark?1:0);
CheckDlgButton(hw,IDC_3DSETTINGS_FOG,CommonSettings.GFX3D_Fog?1:0);
//CheckDlgButton(hw,IDC_ALTERNATEFLUSH,CommonSettings.gfx3d_flushMode);
for(i = 0; core3DList[i] != NULL; i++)
@ -4423,10 +4423,14 @@ LRESULT CALLBACK GFX3DSettingsDlgProc(HWND hw, UINT msg, WPARAM wp, LPARAM lp)
{
case IDOK:
{
CommonSettings.HighResolutionInterpolateColor = IsDlgCheckboxChecked(hw,IDC_INTERPOLATECOLOR);
CommonSettings.GFX3D_HighResolutionInterpolateColor = IsDlgCheckboxChecked(hw,IDC_INTERPOLATECOLOR);
CommonSettings.GFX3D_EdgeMark = IsDlgCheckboxChecked(hw,IDC_3DSETTINGS_EDGEMARK);
CommonSettings.GFX3D_Fog = IsDlgCheckboxChecked(hw,IDC_3DSETTINGS_FOG);
NDS_3D_ChangeCore(ComboBox_GetCurSel(GetDlgItem(hw, IDC_3DCORE)));
WritePrivateProfileInt("3D", "Renderer", cur3DCore, IniName);
WritePrivateProfileInt("3D", "HighResolutionInterpolateColor", CommonSettings.HighResolutionInterpolateColor?1:0, IniName);
WritePrivateProfileInt("3D", "HighResolutionInterpolateColor", CommonSettings.GFX3D_HighResolutionInterpolateColor?1:0, IniName);
WritePrivateProfileInt("3D", "EnableEdgeMark", CommonSettings.GFX3D_EdgeMark?1:0, IniName);
WritePrivateProfileInt("3D", "EnableFog", CommonSettings.GFX3D_Fog?1:0, IniName);
//CommonSettings.gfx3d_flushMode = (IsDlgButtonChecked(hw,IDC_ALTERNATEFLUSH) == BST_CHECKED)?1:0;
//WritePrivateProfileInt("3D", "AlternateFlush", CommonSettings.gfx3d_flushMode, IniName);
}

View File

@ -277,7 +277,6 @@
#define IDC_SOUNDCORECB 1000
#define IDC_USEEXTBIOS 1000
#define ID_BROWSE 1000
#define IDC_ALTERNATEFLUSH 1001
#define IDC_BGMAP_BGXCNT 1001
#define IDC_CHECKBOX_DEBUGGERMODE 1001
#define IDC_EDIT01 1001
@ -630,7 +629,9 @@
#define IDC_GI_FATOFS 4464
#define IDC_INTERPOLATECOLOR 4464
#define IDC_GI_FATSIZE 4465
#define IDC_3DSETTINGS_EDGEMARK 4465
#define IDC_GI_ICONTITLEOFS 4466
#define IDC_3DSETTINGS_FOG 4466
#define IDC_GI_USEDROMSIZE 4467
#define IDC_GI_ICON 4469
#define IDC_GI_TITLE 4470

Binary file not shown.