GSdx: small optimizations and tried to fix that dx9 fullscreen alt+tab crash

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1463 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-07-04 15:14:04 +00:00
parent 2a5a32d359
commit 4c76909afe
38 changed files with 597 additions and 497 deletions

View File

@ -113,7 +113,17 @@ void GPURenderer::VSync()
Flush(); Flush();
if(!Merge()) return; if(!m_dev->IsLost(true))
{
if(!Merge())
{
return;
}
}
else
{
ResetDevice();
}
// osd // osd
@ -148,11 +158,6 @@ void GPURenderer::VSync()
SetWindowText(m_hWnd, s.c_str()); SetWindowText(m_hWnd, s.c_str());
} }
if(m_dev->IsLost())
{
ResetDevice();
}
GSVector4i r; GSVector4i r;
GetClientRect(m_hWnd, r); GetClientRect(m_hWnd, r);

View File

@ -50,36 +50,16 @@ GSTexture* GPURendererSW::GetOutput()
r.right <<= m_scale.x; r.right <<= m_scale.x;
r.bottom <<= m_scale.y; r.bottom <<= m_scale.y;
// TODO if(m_dev->ResizeTexture(&m_texture, r.width(), r.height()))
static uint32* buff = (uint32*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(uint32), 16);
m_mem.ReadFrame32(r, buff, !!m_env.STATUS.ISRGB24);
int w = r.width();
int h = r.height();
if(m_texture)
{ {
if(m_texture->GetWidth() != w || m_texture->GetHeight() != h) // TODO
{ static uint32* buff = (uint32*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(uint32), 16);
delete m_texture;
m_texture = NULL; m_mem.ReadFrame32(r, buff, !!m_env.STATUS.ISRGB24);
}
m_texture->Update(r.rsize(), buff, m_mem.GetWidth() * sizeof(uint32));
} }
if(!m_texture)
{
m_texture = m_dev->CreateTexture(w, h);
if(!m_texture)
{
return NULL;
}
}
m_texture->Update(r.rsize(), buff, m_mem.GetWidth() * sizeof(uint32));
return m_texture; return m_texture;
} }
@ -164,8 +144,8 @@ void GPURendererSW::Draw()
{ {
GSVector4 p = m_vertices[i].p; GSVector4 p = m_vertices[i].p;
tl = tl.minv(p); tl = tl.min(p);
br = br.maxv(p); br = br.max(p);
} }
GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor); GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor);

View File

@ -801,7 +801,9 @@ union
uint64 _PAD4:30; uint64 _PAD4:30;
}; };
}; };
REG_END REG_END2
__forceinline bool IsRepeating() {return ((uint32)1 << TW) > (TBW << 6);}
REG_END2
REG64_(GIFReg, TEX1) REG64_(GIFReg, TEX1)
uint32 LCM:1; uint32 LCM:1;

View File

@ -89,6 +89,7 @@ CRC::Game CRC::m_games[] =
{0x7D8F539A, SoTC, EU, 0}, {0x7D8F539A, SoTC, EU, 0},
{0x3122B508, OnePieceGrandAdventure, US, 0}, {0x3122B508, OnePieceGrandAdventure, US, 0},
{0x8DF14A24, OnePieceGrandAdventure, Unknown, 0}, {0x8DF14A24, OnePieceGrandAdventure, Unknown, 0},
{0x5D02CC5B, OnePieceGrandBattle, Unknown, 0},
{0x6F8545DB, ICO, US, 0}, {0x6F8545DB, ICO, US, 0},
{0xB01A4C95, ICO, JP, 0}, {0xB01A4C95, ICO, JP, 0},
{0x5C991F4E, ICO, Unknown, 0}, {0x5C991F4E, ICO, Unknown, 0},
@ -143,7 +144,9 @@ CRC::Game CRC::m_games[] =
{0x23A97857, StarOcean3, JPUNDUB, 0}, {0x23A97857, StarOcean3, JPUNDUB, 0},
{0xCC96CE93, ValkyrieProfile2, US, 0}, {0xCC96CE93, ValkyrieProfile2, US, 0},
{0x774DE8E2, ValkyrieProfile2, JP, 0}, {0x774DE8E2, ValkyrieProfile2, JP, 0},
{0x47B9B2FD, RadiataStories, US, 0}, {0x04CCB600, ValkyrieProfile2, EU, 0},
{0x47B9B2FD, RadiataStories, US, 0},
{0xE8FCF8EC, SMTNocturne, US, ZWriteMustNotClear}, // saves/reloads z buffer around shadow drawing
}; };
hash_map<uint32, CRC::Game*> CRC::m_map; hash_map<uint32, CRC::Game*> CRC::m_map;

View File

@ -48,6 +48,7 @@ public:
BullyCC, BullyCC,
SoTC, SoTC,
OnePieceGrandAdventure, OnePieceGrandAdventure,
OnePieceGrandBattle,
ICO, ICO,
GT4, GT4,
WildArms5, WildArms5,
@ -74,6 +75,7 @@ public:
StarOcean3, StarOcean3,
ValkyrieProfile2, ValkyrieProfile2,
RadiataStories, RadiataStories,
SMTNocturne,
TitleCount, TitleCount,
}; };
@ -95,6 +97,7 @@ public:
enum Flags enum Flags
{ {
PointListPalette = 1, PointListPalette = 1,
ZWriteMustNotClear = 2,
}; };
struct Game struct Game

View File

@ -44,7 +44,7 @@ bool GSDevice::Create(GSWnd* wnd, bool vsync)
return true; return true;
} }
bool GSDevice::Reset(int w, int h, bool fs) bool GSDevice::Reset(int w, int h, int mode)
{ {
for(list<GSTexture*>::iterator i = m_pool.begin(); i != m_pool.end(); i++) for(list<GSTexture*>::iterator i = m_pool.begin(); i != m_pool.end(); i++)
{ {
@ -74,13 +74,15 @@ void GSDevice::Present(const GSVector4i& r, int shader)
{ {
GSVector4i cr = m_wnd->GetClientRect(); GSVector4i cr = m_wnd->GetClientRect();
// Skip Presentation if the surface is invisible (minimized or hidden); prevents DX null swapchain crashes. int w = std::max(cr.width(), 1);
if(cr.width() == 0 || cr.height() == 0) int h = std::max(cr.height(), 1);
return;
if(m_backbuffer->GetWidth() != cr.width() || m_backbuffer->GetHeight() != cr.height()) if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
{ {
Reset(cr.width(), cr.height(), false); if(!Reset(w, h, DontCare))
{
return;
}
} }
ClearRenderTarget(m_backbuffer, 0); ClearRenderTarget(m_backbuffer, 0);
@ -225,4 +227,22 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse
{ {
m_current = m_merge; m_current = m_merge;
} }
} }
bool GSDevice::ResizeTexture(GSTexture** t, int w, int h)
{
if(t == NULL) {ASSERT(0); return false;}
GSTexture* t2 = *t;
if(t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h)
{
delete t2;
t2 = CreateTexture(w, h);
*t = t2;
}
return t2 != NULL;
}

View File

@ -73,10 +73,11 @@ public:
void Recycle(GSTexture* t); void Recycle(GSTexture* t);
virtual bool Create(GSWnd* wnd, bool vsync); enum {Windowed, Fullscreen, DontCare};
virtual bool Reset(int w, int h, bool fs);
virtual bool IsLost() {return false;} virtual bool Create(GSWnd* wnd, bool vsync);
virtual bool Reset(int w, int h, int mode);
virtual bool IsLost(bool update = false) {return false;}
virtual void Present(const GSVector4i& r, int shader); virtual void Present(const GSVector4i& r, int shader);
virtual void Flip() {} virtual void Flip() {}
@ -105,6 +106,8 @@ public:
void Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c); void Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c);
void Interlace(const GSVector2i& ds, int field, int mode, float yoffset); void Interlace(const GSVector2i& ds, int field, int mode, float yoffset);
bool ResizeTexture(GSTexture** t, int w, int h);
template<class T> void PrepareShaderMacro(vector<T>& dst, const T* src, const char* model) template<class T> void PrepareShaderMacro(vector<T>& dst, const T* src, const char* model)
{ {
dst.clear(); dst.clear();

View File

@ -207,16 +207,16 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync)
// //
Reset(1, 1, false); Reset(1, 1, Windowed);
// //
return true; return true;
} }
bool GSDevice10::Reset(int w, int h, bool fs) bool GSDevice10::Reset(int w, int h, int mode)
{ {
if(!__super::Reset(w, h, fs)) if(!__super::Reset(w, h, mode))
return false; return false;
DXGI_SWAP_CHAIN_DESC scd; DXGI_SWAP_CHAIN_DESC scd;
@ -504,7 +504,7 @@ void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
m_vertices.vb = NULL; m_vertices.vb = NULL;
m_vertices.start = 0; m_vertices.start = 0;
m_vertices.count = 0; m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000); m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
} }
if(m_vertices.vb == NULL) if(m_vertices.vb == NULL)

View File

@ -96,8 +96,7 @@ public:
virtual ~GSDevice10(); virtual ~GSDevice10();
bool Create(GSWnd* wnd, bool vsync); bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs); bool Reset(int w, int h, int mode);
void Flip(); void Flip();
void BeginScene(); void BeginScene();

View File

@ -244,16 +244,16 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync)
// //
Reset(1, 1, false); Reset(1, 1, Windowed);
// //
return true; return true;
} }
bool GSDevice11::Reset(int w, int h, bool fs) bool GSDevice11::Reset(int w, int h, int mode)
{ {
if(!__super::Reset(w, h, fs)) if(!__super::Reset(w, h, mode))
return false; return false;
DXGI_SWAP_CHAIN_DESC scd; DXGI_SWAP_CHAIN_DESC scd;
@ -541,7 +541,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
m_vertices.vb = NULL; m_vertices.vb = NULL;
m_vertices.start = 0; m_vertices.start = 0;
m_vertices.count = 0; m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000); m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
} }
if(m_vertices.vb == NULL) if(m_vertices.vb == NULL)

View File

@ -99,8 +99,7 @@ public:
virtual ~GSDevice11(); virtual ~GSDevice11();
bool Create(GSWnd* wnd, bool vsync); bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs); bool Reset(int w, int h, int mode);
void Flip(); void Flip();
void BeginScene(); void BeginScene();

View File

@ -25,6 +25,7 @@
#include "GSDevice7.h" #include "GSDevice7.h"
GSDevice7::GSDevice7() GSDevice7::GSDevice7()
: m_lost(false)
{ {
} }
@ -75,14 +76,14 @@ bool GSDevice7::Create(GSWnd* wnd, bool vsync)
return false; return false;
} }
Reset(1, 1, false); Reset(1, 1, Windowed);
return true; return true;
} }
bool GSDevice7::Reset(int w, int h, bool fs) bool GSDevice7::Reset(int w, int h, int mode)
{ {
if(!__super::Reset(w, h, fs)) if(!__super::Reset(w, h, mode))
return false; return false;
DDSURFACEDESC2 desc; DDSURFACEDESC2 desc;
@ -130,6 +131,8 @@ bool GSDevice7::Reset(int w, int h, bool fs)
} }
} }
m_lost = false;
return true; return true;
} }
@ -139,9 +142,15 @@ void GSDevice7::Present(const GSVector4i& r, int shader)
GSVector4i cr = m_wnd->GetClientRect(); GSVector4i cr = m_wnd->GetClientRect();
if(m_backbuffer->GetWidth() != cr.width() || m_backbuffer->GetHeight() != cr.height()) int w = std::max(cr.width(), 1);
int h = std::max(cr.height(), 1);
if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
{ {
Reset(cr.width(), cr.height(), false); if(!Reset(w, h, DontCare))
{
return;
}
} }
CComPtr<IDirectDrawSurface7> backbuffer = *(GSTexture7*)m_backbuffer; CComPtr<IDirectDrawSurface7> backbuffer = *(GSTexture7*)m_backbuffer;
@ -157,7 +166,10 @@ void GSDevice7::Present(const GSVector4i& r, int shader)
GSVector4i r2 = r; GSVector4i r2 = r;
hr = backbuffer->Blt(r2, *(GSTexture7*)m_merge, NULL, DDBLT_WAIT, NULL); if(m_current)
{
hr = backbuffer->Blt(r2, *(GSTexture7*)m_current, NULL, DDBLT_WAIT, NULL);
}
// if ClearRenderTarget was implemented the parent class could handle these tasks until this point // if ClearRenderTarget was implemented the parent class could handle these tasks until this point
@ -174,6 +186,8 @@ void GSDevice7::Present(const GSVector4i& r, int shader)
if(hr == DDERR_SURFACELOST) if(hr == DDERR_SURFACELOST)
{ {
m_lost = true;
// TODO // TODO
HRESULT hr = m_dd->TestCooperativeLevel(); HRESULT hr = m_dd->TestCooperativeLevel();

View File

@ -29,6 +29,7 @@ class GSDevice7 : public GSDevice
private: private:
CComPtr<IDirectDraw7> m_dd; CComPtr<IDirectDraw7> m_dd;
CComPtr<IDirectDrawSurface7> m_primary; CComPtr<IDirectDrawSurface7> m_primary;
bool m_lost;
GSTexture* Create(int type, int w, int h, int format); GSTexture* Create(int type, int w, int h, int format);
@ -40,7 +41,7 @@ public:
virtual ~GSDevice7(); virtual ~GSDevice7();
bool Create(GSWnd* wnd, bool vsync); bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs); bool Reset(int w, int h, int mode);
bool IsLost(bool update) {return m_lost;}
void Present(const GSVector4i& r, int shader); void Present(const GSVector4i& r, int shader);
}; };

View File

@ -42,6 +42,7 @@ GSDevice9::GSDevice9()
, m_bf(0xffffffff) , m_bf(0xffffffff)
, m_rtv(NULL) , m_rtv(NULL)
, m_dsv(NULL) , m_dsv(NULL)
, m_lost(false)
{ {
memset(&m_pp, 0, sizeof(m_pp)); memset(&m_pp, 0, sizeof(m_pp));
memset(&m_ddcaps, 0, sizeof(m_ddcaps)); memset(&m_ddcaps, 0, sizeof(m_ddcaps));
@ -105,9 +106,7 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync)
m_d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &m_d3dcaps); m_d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &m_d3dcaps);
bool fs = theApp.GetConfig("ModeWidth", 0) > 0; if(!Reset(1, 1, theApp.GetConfig("ModeWidth", 0) > 0 ? Fullscreen : Windowed)) return false;
if(!Reset(1, 1, fs)) return false;
m_dev->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0); m_dev->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
@ -188,35 +187,48 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync)
return true; return true;
} }
bool GSDevice9::Reset(int w, int h, bool fs) bool GSDevice9::Reset(int w, int h, int mode)
{ {
if(!__super::Reset(w, h, fs)) if(!__super::Reset(w, h, mode))
return false; return false;
HRESULT hr; HRESULT hr;
if(!m_d3d) return false; if(!m_d3d) return false;
if(m_swapchain && !fs && m_pp.Windowed) if(mode == DontCare)
{ {
m_swapchain = NULL; mode = m_pp.Windowed ? Windowed : Fullscreen;
}
m_pp.BackBufferWidth = w; if(!m_lost)
m_pp.BackBufferHeight = h; {
if(m_swapchain && mode != Fullscreen && m_pp.Windowed)
{
m_swapchain = NULL;
hr = m_dev->CreateAdditionalSwapChain(&m_pp, &m_swapchain); m_pp.BackBufferWidth = w;
m_pp.BackBufferHeight = h;
if(FAILED(hr)) return false; hr = m_dev->CreateAdditionalSwapChain(&m_pp, &m_swapchain);
CComPtr<IDirect3DSurface9> backbuffer; if(FAILED(hr)) return false;
hr = m_swapchain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &backbuffer);
m_backbuffer = new GSTexture9(backbuffer);
return true; CComPtr<IDirect3DSurface9> backbuffer;
hr = m_swapchain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &backbuffer);
m_backbuffer = new GSTexture9(backbuffer);
return true;
}
} }
m_swapchain = NULL; m_swapchain = NULL;
m_vertices.vb = NULL;
m_vertices.vb_old = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
if(m_vs_cb) _aligned_free(m_vs_cb); if(m_vs_cb) _aligned_free(m_vs_cb);
if(m_ps_cb) _aligned_free(m_ps_cb); if(m_ps_cb) _aligned_free(m_ps_cb);
@ -258,7 +270,7 @@ bool GSDevice9::Reset(int w, int h, bool fs)
int mh = theApp.GetConfig("ModeHeight", 0); int mh = theApp.GetConfig("ModeHeight", 0);
int mrr = theApp.GetConfig("ModeRefreshRate", 0); int mrr = theApp.GetConfig("ModeRefreshRate", 0);
if(fs && mw > 0 && mh > 0 && mrr >= 0) if(mode == Fullscreen && mw > 0 && mh > 0 && mrr >= 0)
{ {
m_pp.Windowed = FALSE; m_pp.Windowed = FALSE;
m_pp.BackBufferWidth = mw; m_pp.BackBufferWidth = mw;
@ -324,27 +336,39 @@ bool GSDevice9::Reset(int w, int h, bool fs)
return true; return true;
} }
bool GSDevice9::IsLost() bool GSDevice9::IsLost(bool update)
{ {
HRESULT hr = m_dev->TestCooperativeLevel(); if(!m_lost || update)
{
return hr == D3DERR_DEVICELOST || hr == D3DERR_DEVICENOTRESET; HRESULT hr = m_dev->TestCooperativeLevel();
m_lost = hr == D3DERR_DEVICELOST || hr == D3DERR_DEVICENOTRESET;
}
return m_lost;
} }
void GSDevice9::Flip() void GSDevice9::Flip()
{ {
m_dev->EndScene(); m_dev->EndScene();
HRESULT hr;
if(m_swapchain) if(m_swapchain)
{ {
m_swapchain->Present(NULL, NULL, NULL, NULL, 0); hr = m_swapchain->Present(NULL, NULL, NULL, NULL, 0);
} }
else else
{ {
m_dev->Present(NULL, NULL, NULL, NULL); hr = m_dev->Present(NULL, NULL, NULL, NULL);
} }
m_dev->BeginScene(); m_dev->BeginScene();
if(FAILED(hr))
{
m_lost = true;
}
} }
void GSDevice9::BeginScene() void GSDevice9::BeginScene()
@ -663,7 +687,7 @@ void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t co
m_vertices.vb = NULL; m_vertices.vb = NULL;
m_vertices.start = 0; m_vertices.start = 0;
m_vertices.count = 0; m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000); m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
} }
if(m_vertices.vb == NULL) if(m_vertices.vb == NULL)

View File

@ -95,6 +95,7 @@ private:
CComPtr<IDirect3D9> m_d3d; CComPtr<IDirect3D9> m_d3d;
CComPtr<IDirect3DDevice9> m_dev; CComPtr<IDirect3DDevice9> m_dev;
CComPtr<IDirect3DSwapChain9> m_swapchain; CComPtr<IDirect3DSwapChain9> m_swapchain;
bool m_lost;
struct struct
{ {
@ -132,9 +133,8 @@ public:
virtual ~GSDevice9(); virtual ~GSDevice9();
bool Create(GSWnd* wnd, bool vsync); bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs); bool Reset(int w, int h, int mode);
bool IsLost(bool update);
bool IsLost();
void Flip(); void Flip();
void BeginScene(); void BeginScene();

View File

@ -29,14 +29,14 @@ bool GSDeviceNull::Create(GSWnd* wnd, bool vsync)
return false; return false;
} }
Reset(1, 1, false); Reset(1, 1, Windowed);
return true; return true;
} }
bool GSDeviceNull::Reset(int w, int h, bool fs) bool GSDeviceNull::Reset(int w, int h, int mode)
{ {
if(!__super::Reset(w, h, fs)) if(!__super::Reset(w, h, mode))
return false; return false;
return true; return true;

View File

@ -36,5 +36,5 @@ public:
GSDeviceNull() {} GSDeviceNull() {}
bool Create(GSWnd* wnd, bool vsync); bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs); bool Reset(int w, int h, int mode);
}; };

View File

@ -150,14 +150,14 @@ bool GSDeviceOGL::Create(GSWnd* wnd, bool vsync)
*/ */
GSVector4i r = wnd->GetClientRect(); GSVector4i r = wnd->GetClientRect();
Reset(r.width(), r.height(), false); Reset(r.width(), r.height(), Windowed);
return true; return true;
} }
bool GSDeviceOGL::Reset(int w, int h, bool fs) bool GSDeviceOGL::Reset(int w, int h, int mode)
{ {
if(!__super::Reset(w, h, fs)) if(!__super::Reset(w, h, mode))
return false; return false;
glCullFace(GL_FRONT_AND_BACK); CheckError(); glCullFace(GL_FRONT_AND_BACK); CheckError();
@ -361,7 +361,7 @@ void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t stride, size_t
{ {
m_vertices.start = 0; m_vertices.start = 0;
m_vertices.count = 0; m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000); m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
growbuffer = true; growbuffer = true;
} }

View File

@ -110,8 +110,7 @@ public:
virtual ~GSDeviceOGL(); virtual ~GSDeviceOGL();
bool Create(GSWnd* wnd, bool vsync); bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs); bool Reset(int w, int h, int mode);
void Present(const GSVector4i& r, int shader); void Present(const GSVector4i& r, int shader);
void Flip(); void Flip();

View File

@ -48,8 +48,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.vm = p->vm; m_env.vm = p->vm;
m_env.fbr = p->fbo->row; m_env.fbr = p->fbo->row;
m_env.zbr = p->zbo->row; m_env.zbr = p->zbo->row;
m_env.fbc = p->fbo->col; m_env.fbc = p->fbo->col[0];
m_env.zbc = p->zbo->col; m_env.zbc = p->zbo->col[0];
m_env.fzbr = p->fzbo->row; m_env.fzbr = p->fzbo->row;
m_env.fzbc = p->fzbo->col; m_env.fzbc = p->fzbo->col;
m_env.fm = GSVector4i(p->fm); m_env.fm = GSVector4i(p->fm);
@ -116,8 +116,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.t.mask.u32[0] = 0; m_env.t.mask.u32[0] = 0;
break; break;
case CLAMP_REGION_CLAMP: case CLAMP_REGION_CLAMP:
m_env.t.min.u16[0] = min(context->CLAMP.MINU, tw - 1); m_env.t.min.u16[0] = std::min<int>(context->CLAMP.MINU, tw - 1);
m_env.t.max.u16[0] = min(context->CLAMP.MAXU, tw - 1); m_env.t.max.u16[0] = std::min<int>(context->CLAMP.MAXU, tw - 1);
m_env.t.mask.u32[0] = 0; m_env.t.mask.u32[0] = 0;
break; break;
case CLAMP_REGION_REPEAT: case CLAMP_REGION_REPEAT:
@ -142,8 +142,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.t.mask.u32[2] = 0; m_env.t.mask.u32[2] = 0;
break; break;
case CLAMP_REGION_CLAMP: case CLAMP_REGION_CLAMP:
m_env.t.min.u16[4] = min(context->CLAMP.MINV, th - 1); m_env.t.min.u16[4] = std::min<int>(context->CLAMP.MINV, th - 1);
m_env.t.max.u16[4] = min(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256) m_env.t.max.u16[4] = std::min<int>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
m_env.t.mask.u32[2] = 0; m_env.t.mask.u32[2] = 0;
break; break;
case CLAMP_REGION_REPEAT: case CLAMP_REGION_REPEAT:
@ -224,22 +224,22 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
{ {
if(m == 0) if(m == 0)
{ {
DrawSolidRectT<uint32, false>(m_env.zbr, m_env.zbc[0], r, z, m); DrawSolidRectT<uint32, false>(m_env.zbr, m_env.zbc, r, z, m);
} }
else else
{ {
DrawSolidRectT<uint32, true>(m_env.zbr, m_env.zbc[0], r, z, m); DrawSolidRectT<uint32, true>(m_env.zbr, m_env.zbc, r, z, m);
} }
} }
else else
{ {
if(m == 0) if(m == 0)
{ {
DrawSolidRectT<uint16, false>(m_env.zbr, m_env.zbc[0], r, z, m); DrawSolidRectT<uint16, false>(m_env.zbr, m_env.zbc, r, z, m);
} }
else else
{ {
DrawSolidRectT<uint16, true>(m_env.zbr, m_env.zbc[0], r, z, m); DrawSolidRectT<uint16, true>(m_env.zbr, m_env.zbc, r, z, m);
} }
} }
} }
@ -259,11 +259,11 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
{ {
if(m == 0) if(m == 0)
{ {
DrawSolidRectT<uint32, false>(m_env.fbr, m_env.fbc[0], r, c, m); DrawSolidRectT<uint32, false>(m_env.fbr, m_env.fbc, r, c, m);
} }
else else
{ {
DrawSolidRectT<uint32, true>(m_env.fbr, m_env.fbc[0], r, c, m); DrawSolidRectT<uint32, true>(m_env.fbr, m_env.fbc, r, c, m);
} }
} }
else else
@ -272,18 +272,18 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
if(m == 0) if(m == 0)
{ {
DrawSolidRectT<uint16, false>(m_env.fbr, m_env.fbc[0], r, c, m); DrawSolidRectT<uint16, false>(m_env.fbr, m_env.fbc, r, c, m);
} }
else else
{ {
DrawSolidRectT<uint16, true>(m_env.fbr, m_env.fbc[0], r, c, m); DrawSolidRectT<uint16, true>(m_env.fbr, m_env.fbc, r, c, m);
} }
} }
} }
} }
template<class T, bool masked> template<class T, bool masked>
void GSDrawScanline::DrawSolidRectT(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m) void GSDrawScanline::DrawSolidRectT(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m)
{ {
if(m == 0xffffffff) return; if(m == 0xffffffff) return;
@ -320,13 +320,13 @@ void GSDrawScanline::DrawSolidRectT(const GSVector4i* row, int* col, const GSVec
} }
template<class T, bool masked> template<class T, bool masked>
void GSDrawScanline::FillRect(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m) void GSDrawScanline::FillRect(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m)
{ {
if(r.x >= r.z) return; if(r.x >= r.z) return;
for(int y = r.y; y < r.w; y++) for(int y = r.y; y < r.w; y++)
{ {
uint32 base = row[y].x; uint32 base = row[y];
for(int x = r.x; x < r.z; x++) for(int x = r.x; x < r.z; x++)
{ {
@ -338,13 +338,13 @@ void GSDrawScanline::FillRect(const GSVector4i* row, int* col, const GSVector4i&
} }
template<class T, bool masked> template<class T, bool masked>
void GSDrawScanline::FillBlock(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m) void GSDrawScanline::FillBlock(const int* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
{ {
if(r.x >= r.z) return; if(r.x >= r.z) return;
for(int y = r.y; y < r.w; y += 8) for(int y = r.y; y < r.w; y += 8)
{ {
uint32 base = row[y].x; uint32 base = row[y];
for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{ {

View File

@ -60,13 +60,13 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v); void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
template<class T, bool masked> template<class T, bool masked>
void DrawSolidRectT(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m); void DrawSolidRectT(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m);
template<class T, bool masked> template<class T, bool masked>
__forceinline void FillRect(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m); __forceinline void FillRect(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m);
template<class T, bool masked> template<class T, bool masked>
__forceinline void FillBlock(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m); __forceinline void FillBlock(const int* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
protected: protected:
GSState* m_state; GSState* m_state;

View File

@ -28,19 +28,21 @@
#include "GSLocalMemory.h" #include "GSLocalMemory.h"
#define ASSERT_BLOCK(r, w, h) \ #define ASSERT_BLOCK(r, w, h) \
ASSERT((r).width() >= w && (r).height() >= h && !((r).left&(w-1)) && !((r).top&(h-1)) && !((r).right&(w-1)) && !((r).bottom&(h-1))); \ ASSERT((r).width() >= w && (r).height() >= h && !((r).left & (w - 1)) && !((r).top & (h - 1)) && !((r).right & (w - 1)) && !((r).bottom & (h - 1))); \
#define FOREACH_BLOCK_START(w, h, bpp, format) \ #define FOREACH_BLOCK_START(r, w, h, bpp, psm) \
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[format]; \ ASSERT_BLOCK(r, w, h); \
uint32 bp = TEX0.TBP0; \ const GSLocalMemory::BlockOffset* RESTRICT _bo = GetBlockOffset(TEX0.TBP0, TEX0.TBW, psm); \
uint32 bw = TEX0.TBW; \ GSVector4i _r = r >> 3; \
int offset = dstpitch * h - r.width() * bpp / 8; \ uint8* _dst = dst - _r.left * bpp; \
for(int y = r.top, ye = r.bottom; y < ye; y += h, dst += offset) \ int _offset = dstpitch * h; \
{ ASSERT_BLOCK(r, w, h); \ for(int y = _r.top; y < _r.bottom; y += h >> 3, _dst += _offset) \
uint32 base = psm.bn(0, y, bp, bw); \ { \
for(int x = r.left, xe = r.right; x < xe; x += w, dst += w * bpp / 8) \ uint32 _base = _bo->row[y]; \
for(int x = _r.left; x < _r.right; x += w >> 3) \
{ \ { \
const uint8* src = BlockPtr(base + psm.blockOffset[x >> 3]); \ const uint8* src = BlockPtr(_base + _bo->col[x]); \
uint8* dst = &_dst[x * bpp]; \
#define FOREACH_BLOCK_END }} #define FOREACH_BLOCK_END }}
@ -463,37 +465,65 @@ GSLocalMemory::~GSLocalMemory()
{ {
VirtualFree(m_vm8, 0, MEM_RELEASE); VirtualFree(m_vm8, 0, MEM_RELEASE);
for(hash_map<uint32, Offset*>::iterator i = m_omap.begin(); i != m_omap.end(); i++) for(hash_map<uint32, BlockOffset*>::iterator i = m_bomap.begin(); i != m_bomap.end(); i++)
{ {
Offset* o = i->second; _aligned_free(i->second);
_aligned_free(o->col[0]);
_aligned_free(o);
} }
for(hash_map<uint32, Offset4*>::iterator i = m_o4map.begin(); i != m_o4map.end(); i++) for(hash_map<uint32, PixelOffset*>::iterator i = m_pomap.begin(); i != m_pomap.end(); i++)
{
_aligned_free(i->second);
}
for(hash_map<uint32, PixelOffset4*>::iterator i = m_po4map.begin(); i != m_po4map.end(); i++)
{ {
_aligned_free(i->second); _aligned_free(i->second);
} }
} }
GSLocalMemory::Offset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm) GSLocalMemory::BlockOffset* GSLocalMemory::GetBlockOffset(uint32 bp, uint32 bw, uint32 psm)
{ {
if(bw == 0) {ASSERT(0); return NULL;}
ASSERT(m_psm[psm].bpp > 8); // only for 16/24/32/8h/4hh/4hl formats where all columns are the same
uint32 hash = bp | (bw << 14) | (psm << 20); uint32 hash = bp | (bw << 14) | (psm << 20);
hash_map<uint32, Offset*>::iterator i = m_omap.find(hash); hash_map<uint32, BlockOffset*>::iterator i = m_bomap.find(hash);
if(i != m_omap.end()) if(i != m_bomap.end())
{ {
return i->second; return i->second;
} }
Offset* o = (Offset*)_aligned_malloc(sizeof(Offset), 16); BlockOffset* o = (BlockOffset*)_aligned_malloc(sizeof(BlockOffset), 16);
o->hash = hash;
pixelAddress bn = m_psm[psm].bn;
for(int i = 0; i < 256; i++)
{
o->row[i] = (int)bn(0, i << 3, bp, bw);
}
o->col = m_psm[psm].blockOffset;
m_bomap[hash] = o;
return o;
}
GSLocalMemory::PixelOffset* GSLocalMemory::GetPixelOffset(uint32 bp, uint32 bw, uint32 psm)
{
if(bw == 0) {ASSERT(0); return NULL;}
uint32 hash = bp | (bw << 14) | (psm << 20);
hash_map<uint32, PixelOffset*>::iterator i = m_pomap.find(hash);
if(i != m_pomap.end())
{
return i->second;
}
PixelOffset* o = (PixelOffset*)_aligned_malloc(sizeof(PixelOffset), 16);
o->hash = hash; o->hash = hash;
@ -501,24 +531,20 @@ GSLocalMemory::Offset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm
for(int i = 0; i < 2048; i++) for(int i = 0; i < 2048; i++)
{ {
o->row[i] = GSVector4i((int)pa(0, i, bp, bw)); o->row[i] = (int)pa(0, i, bp, bw);
} }
int* p = (int*)_aligned_malloc(sizeof(int) * (2048 + 3) * 4, 16); for(int i = 0; i < 8; i++)
for(int i = 0; i < 4; i++)
{ {
o->col[i] = &p[2048 * i + ((4 - (i & 3)) & 3)]; o->col[i] = m_psm[psm].rowOffset[i];
memcpy(o->col[i], m_psm[psm].rowOffset[0], sizeof(int) * 2048);
} }
m_omap[hash] = o; m_pomap[hash] = o;
return o; return o;
} }
GSLocalMemory::Offset4* GSLocalMemory::GetOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF) GSLocalMemory::PixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
{ {
uint32 fbp = FRAME.Block(); uint32 fbp = FRAME.Block();
uint32 zbp = ZBUF.Block(); uint32 zbp = ZBUF.Block();
@ -535,14 +561,14 @@ GSLocalMemory::Offset4* GSLocalMemory::GetOffset4(const GIFRegFRAME& FRAME, cons
uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28); uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28);
hash_map<uint32, Offset4*>::iterator i = m_o4map.find(hash); hash_map<uint32, PixelOffset4*>::iterator i = m_po4map.find(hash);
if(i != m_o4map.end()) if(i != m_po4map.end())
{ {
return i->second; return i->second;
} }
Offset4* o = (Offset4*)_aligned_malloc(sizeof(Offset4), 16); PixelOffset4* o = (PixelOffset4*)_aligned_malloc(sizeof(PixelOffset4), 16);
o->hash = hash; o->hash = hash;
@ -564,7 +590,7 @@ GSLocalMemory::Offset4* GSLocalMemory::GetOffset4(const GIFRegFRAME& FRAME, cons
o->col[i].y = m_psm[zpsm].rowOffset[0][i * 4] << zs; o->col[i].y = m_psm[zpsm].rowOffset[0][i * 4] << zs;
} }
m_o4map[hash] = o; m_po4map[hash] = o;
return o; return o;
} }
@ -1436,20 +1462,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
/////////////////// ///////////////////
void GSLocalMemory::ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMCT32) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT32)
{ {
ReadBlock32<true>(src, dst, dstpitch); ReadBlock32<true>(src, dst, dstpitch);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
if(TEXA.AEM) if(TEXA.AEM)
{ {
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMCT24) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT24)
{ {
ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA); ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA);
} }
@ -1457,7 +1483,7 @@ void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch,
} }
else else
{ {
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMCT24) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT24)
{ {
ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA); ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA);
} }
@ -1465,11 +1491,11 @@ void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch,
} }
} }
void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
__declspec(align(16)) uint16 block[16 * 8]; __declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(16, 8, 32, PSM_PSMCT16) FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMCT16)
{ {
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8); ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1478,11 +1504,11 @@ void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch,
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
__declspec(align(16)) uint16 block[16 * 8]; __declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(16, 8, 32, PSM_PSMCT16S) FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMCT16S)
{ {
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8); ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1491,75 +1517,75 @@ void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
FOREACH_BLOCK_START(16, 16, 32, PSM_PSMT8) FOREACH_BLOCK_START(r, 16, 16, 32, PSM_PSMT8)
{ {
ReadAndExpandBlock8_32(src, dst, dstpitch, pal); ReadAndExpandBlock8_32(src, dst, dstpitch, pal);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint64* pal = m_clut; const uint64* pal = m_clut;
FOREACH_BLOCK_START(32, 16, 32, PSM_PSMT4) FOREACH_BLOCK_START(r, 32, 16, 32, PSM_PSMT4)
{ {
ReadAndExpandBlock4_32(src, dst, dstpitch, pal); ReadAndExpandBlock4_32(src, dst, dstpitch, pal);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT8H) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT8H)
{ {
ReadAndExpandBlock8H_32(src, dst, dstpitch, pal); ReadAndExpandBlock8H_32(src, dst, dstpitch, pal);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HL) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HL)
{ {
ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal); ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HH) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HH)
{ {
ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal); ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMZ32) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ32)
{ {
ReadBlock32<true>(src, dst, dstpitch); ReadBlock32<true>(src, dst, dstpitch);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
if(TEXA.AEM) if(TEXA.AEM)
{ {
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMZ24) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ24)
{ {
ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA); ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA);
} }
@ -1567,7 +1593,7 @@ void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch
} }
else else
{ {
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMZ24) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ24)
{ {
ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA); ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA);
} }
@ -1575,11 +1601,11 @@ void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch
} }
} }
void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
__declspec(align(16)) uint16 block[16 * 8]; __declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(16, 8, 32, PSM_PSMZ16) FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMZ16)
{ {
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8); ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1588,11 +1614,11 @@ void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
__declspec(align(16)) uint16 block[16 * 8]; __declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(16, 8, 32, PSM_PSMZ16S) FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMZ16S)
{ {
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8); ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1737,31 +1763,31 @@ void GSLocalMemory::ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, c
} }
/////////////////// ///////////////////
void GSLocalMemory::ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(16, 8, 16, PSM_PSMCT16) FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMCT16)
{ {
ReadBlock16<true>(src, dst, dstpitch); ReadBlock16<true>(src, dst, dstpitch);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(16, 8, 16, PSM_PSMCT16S) FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMCT16S)
{ {
ReadBlock16<true>(src, dst, dstpitch); ReadBlock16<true>(src, dst, dstpitch);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{ {
FOREACH_BLOCK_START(16, 16, 32, PSM_PSMT8) FOREACH_BLOCK_START(r, 16, 16, 32, PSM_PSMT8)
{ {
ReadAndExpandBlock8_32(src, dst, dstpitch, pal); ReadAndExpandBlock8_32(src, dst, dstpitch, pal);
} }
@ -1773,7 +1799,7 @@ void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch
__declspec(align(16)) uint8 block[16 * 16]; __declspec(align(16)) uint8 block[16 * 16];
FOREACH_BLOCK_START(16, 16, 16, PSM_PSMT8) FOREACH_BLOCK_START(r, 16, 16, 16, PSM_PSMT8)
{ {
ReadBlock8<true>(src, (uint8*)block, sizeof(block) / 16); ReadBlock8<true>(src, (uint8*)block, sizeof(block) / 16);
@ -1783,13 +1809,13 @@ void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch
} }
} }
void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint64* pal = m_clut; const uint64* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{ {
FOREACH_BLOCK_START(32, 16, 32, PSM_PSMT4) FOREACH_BLOCK_START(r, 32, 16, 32, PSM_PSMT4)
{ {
ReadAndExpandBlock4_32(src, dst, dstpitch, pal); ReadAndExpandBlock4_32(src, dst, dstpitch, pal);
} }
@ -1801,7 +1827,7 @@ void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch
__declspec(align(16)) uint8 block[(32 / 2) * 16]; __declspec(align(16)) uint8 block[(32 / 2) * 16];
FOREACH_BLOCK_START(32, 16, 16, PSM_PSMT4) FOREACH_BLOCK_START(r, 32, 16, 16, PSM_PSMT4)
{ {
ReadBlock4<true>(src, (uint8*)block, sizeof(block) / 16); ReadBlock4<true>(src, (uint8*)block, sizeof(block) / 16);
@ -1811,13 +1837,13 @@ void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch
} }
} }
void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{ {
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT8H) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT8H)
{ {
ReadAndExpandBlock8H_32(src, dst, dstpitch, pal); ReadAndExpandBlock8H_32(src, dst, dstpitch, pal);
} }
@ -1829,7 +1855,7 @@ void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitc
__declspec(align(16)) uint32 block[8 * 8]; __declspec(align(16)) uint32 block[8 * 8];
FOREACH_BLOCK_START(8, 8, 16, PSM_PSMT8H) FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT8H)
{ {
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8); ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1839,13 +1865,13 @@ void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitc
} }
} }
void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{ {
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HL) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HL)
{ {
ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal); ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal);
} }
@ -1857,7 +1883,7 @@ void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpit
__declspec(align(16)) uint32 block[8 * 8]; __declspec(align(16)) uint32 block[8 * 8];
FOREACH_BLOCK_START(8, 8, 16, PSM_PSMT4HL) FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT4HL)
{ {
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8); ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1867,13 +1893,13 @@ void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpit
} }
} }
void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
const uint32* pal = m_clut; const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{ {
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HH) FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HH)
{ {
ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal); ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal);
} }
@ -1885,7 +1911,7 @@ void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpit
__declspec(align(16)) uint32 block[8 * 8]; __declspec(align(16)) uint32 block[8 * 8];
FOREACH_BLOCK_START(8, 8, 16, PSM_PSMT4HH) FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT4HH)
{ {
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8); ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1895,18 +1921,18 @@ void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpit
} }
} }
void GSLocalMemory::ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(16, 8, 16, PSM_PSMZ16) FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMZ16)
{ {
ReadBlock16<true>(src, dst, dstpitch); ReadBlock16<true>(src, dst, dstpitch);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(16, 8, 16, PSM_PSMZ16S) FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMZ16S)
{ {
ReadBlock16<true>(src, dst, dstpitch); ReadBlock16<true>(src, dst, dstpitch);
} }
@ -1959,45 +1985,45 @@ void GSLocalMemory::ReadTextureNP(const GSVector4i& r, uint8* dst, int dstpitch,
// 32/8 // 32/8
void GSLocalMemory::ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(16, 16, 8, PSM_PSMT8) FOREACH_BLOCK_START(r, 16, 16, 8, PSM_PSMT8)
{ {
ReadBlock8<true>(src, dst, dstpitch); ReadBlock8<true>(src, dst, dstpitch);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(32, 16, 8, PSM_PSMT4) FOREACH_BLOCK_START(r, 32, 16, 8, PSM_PSMT4)
{ {
ReadBlock4P(src, dst, dstpitch); ReadBlock4P(src, dst, dstpitch);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(8, 8, 8, PSM_PSMT8H) FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT8H)
{ {
ReadBlock8HP(src, dst, dstpitch); ReadBlock8HP(src, dst, dstpitch);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(8, 8, 8, PSM_PSMT4HL) FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT4HL)
{ {
ReadBlock4HLP(src, dst, dstpitch); ReadBlock4HLP(src, dst, dstpitch);
} }
FOREACH_BLOCK_END FOREACH_BLOCK_END
} }
void GSLocalMemory::ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const void GSLocalMemory::ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{ {
FOREACH_BLOCK_START(8, 8, 8, PSM_PSMT4HH) FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT4HH)
{ {
ReadBlock4HHP(src, dst, dstpitch); ReadBlock4HHP(src, dst, dstpitch);
} }

View File

@ -43,7 +43,7 @@ public:
typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const; typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const; typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
typedef void (GSLocalMemory::*readTexture)(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; typedef void (GSLocalMemory::*readTexture)(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
typedef union typedef union
@ -81,14 +81,21 @@ public:
GSClut m_clut; GSClut m_clut;
struct Offset struct BlockOffset
{ {
GSVector4i row[2048]; // 0 | 0 | 0 | 0 int row[256]; // yn (n = 0 8 16 ...)
int* col[4]; // x | x+1 | x+2 | x+3 int* col; // blockOffset*
uint32 hash; uint32 hash;
}; };
struct Offset4 struct PixelOffset
{
int row[2048]; // yn (n = 0 1 2 ...)
int* col[8]; // rowOffset*
uint32 hash;
};
struct PixelOffset4
{ {
// 16 bit offsets (m_vm16[...]) // 16 bit offsets (m_vm16[...])
@ -141,15 +148,17 @@ protected:
// //
hash_map<uint32, Offset*> m_omap; hash_map<uint32, BlockOffset*> m_bomap;
hash_map<uint32, Offset4*> m_o4map; hash_map<uint32, PixelOffset*> m_pomap;
hash_map<uint32, PixelOffset4*> m_po4map;
public: public:
GSLocalMemory(); GSLocalMemory();
virtual ~GSLocalMemory(); virtual ~GSLocalMemory();
Offset* GetOffset(uint32 bp, uint32 bw, uint32 psm); BlockOffset* GetBlockOffset(uint32 bp, uint32 bw, uint32 psm);
Offset4* GetOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); PixelOffset* GetPixelOffset(uint32 bp, uint32 bw, uint32 psm);
PixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
// address // address
@ -781,19 +790,19 @@ public:
// * => 32 // * => 32
void ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); void ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
@ -813,25 +822,25 @@ public:
// * => 32/16 // * => 32/16
void ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTextureNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); void ReadTextureNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
// pal ? 8 : 32 // pal ? 8 : 32
void ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; void ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;

View File

@ -290,8 +290,8 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
GSVector4 tb = l.p.upl(v[2].p).ceil(); GSVector4 tb = l.p.upl(v[2].p).ceil();
GSVector4 tbmax = tb.maxv(fscissor.yyyy()); GSVector4 tbmax = tb.max(fscissor.yyyy());
GSVector4 tbmin = tb.minv(fscissor.wwww()); GSVector4 tbmin = tb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin)); GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin));
@ -342,8 +342,8 @@ void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor)
GSVector4 tb = l.p.upl(v[2].p).ceil(); GSVector4 tb = l.p.upl(v[2].p).ceil();
GSVector4 tbmax = tb.maxv(fscissor.yyyy()); GSVector4 tbmax = tb.max(fscissor.yyyy());
GSVector4 tbmin = tb.minv(fscissor.wwww()); GSVector4 tbmin = tb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin)); GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin));
@ -398,8 +398,8 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso
GSVector4 tb = v[0].p.upl(v[1].p).zwzw(v[1].p.upl(v[2].p)).ceil(); GSVector4 tb = v[0].p.upl(v[1].p).zwzw(v[1].p.upl(v[2].p)).ceil();
GSVector4 tbmax = tb.maxv(fscissor.yyyy()); GSVector4 tbmax = tb.max(fscissor.yyyy());
GSVector4 tbmin = tb.minv(fscissor.wwww()); GSVector4 tbmin = tb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.xzyw(tbmin)); GSVector4i tbi = GSVector4i(tbmax.xzyw(tbmin));
@ -462,8 +462,8 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
{ {
GSVector4 lr = l.p.xyxy(r).ceil(); GSVector4 lr = l.p.xyxy(r).ceil();
GSVector4 lrmax = lr.maxv(fscissor.xxxx()); GSVector4 lrmax = lr.max(fscissor.xxxx());
GSVector4 lrmin = lr.minv(fscissor.zzzz()); GSVector4 lrmin = lr.min(fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xxzz(lrmin)); GSVector4i lri = GSVector4i(lrmax.xxzz(lrmin));
@ -503,8 +503,8 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
{ {
GSVector4 lr = l.p.ceil(); GSVector4 lr = l.p.ceil();
GSVector4 lrmax = lr.maxv(fscissor.xxxx()); GSVector4 lrmax = lr.max(fscissor.xxxx());
GSVector4 lrmin = lr.minv(fscissor.zzzz()); GSVector4 lrmin = lr.min(fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xxyy(lrmin)); GSVector4i lri = GSVector4i(lrmax.xxyy(lrmin));
@ -613,8 +613,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(orientation) if(orientation)
{ {
GSVector4 tbmax = lrtb.maxv(fscissor.yyyy()); GSVector4 tbmax = lrtb.max(fscissor.yyyy());
GSVector4 tbmin = lrtb.minv(fscissor.wwww()); GSVector4 tbmin = lrtb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin)); GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin));
@ -711,8 +711,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
} }
else else
{ {
GSVector4 lrmax = lrtb.maxv(fscissor.xxxx()); GSVector4 lrmax = lrtb.max(fscissor.xxxx());
GSVector4 lrmin = lrtb.minv(fscissor.zzzz()); GSVector4 lrmin = lrtb.min(fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin)); GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin));

View File

@ -259,9 +259,17 @@ void GSRenderer::VSync(int field)
Flush(); Flush();
field = field ? 1 : 0; if(!m_dev->IsLost(true))
{
if(!Merge(field)) return; if(!Merge(field ? 1 : 0))
{
return;
}
}
else
{
ResetDevice();
}
// osd // osd
@ -311,11 +319,6 @@ void GSRenderer::VSync(int field)
// present // present
if(m_dev->IsLost())
{
ResetDevice();
}
m_dev->Present(m_wnd.GetClientRect().fit(m_aspectratio), m_shader); m_dev->Present(m_wnd.GetClientRect().fit(m_aspectratio), m_shader);
// snapshot // snapshot

View File

@ -127,7 +127,10 @@ protected:
{ {
// FIXME: berserk fpsm = 27 (8H) // FIXME: berserk fpsm = 27 (8H)
Draw(); if(!m_dev->IsLost())
{
Draw();
}
m_perfmon.Put(GSPerfMon::Draw, 1); m_perfmon.Put(GSPerfMon::Draw, 1);
} }

View File

@ -122,14 +122,14 @@ void GSRendererDX9::VertexKick(bool skip)
case GS_LINELIST: case GS_LINELIST:
case GS_LINESTRIP: case GS_LINESTRIP:
case GS_SPRITE: case GS_SPRITE:
pmin = v[0].p.minv(v[1].p); pmin = v[0].p.min(v[1].p);
pmax = v[0].p.maxv(v[1].p); pmax = v[0].p.max(v[1].p);
break; break;
case GS_TRIANGLELIST: case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP: case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN: case GS_TRIANGLEFAN:
pmin = v[0].p.minv(v[1].p).minv(v[2].p); pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.maxv(v[1].p).maxv(v[2].p); pmax = v[0].p.max(v[1].p).max(v[2].p);
break; break;
} }

View File

@ -87,51 +87,34 @@ GSTexture* GSRendererSW::GetOutput(int i)
TEX0.TBW = DISPFB.FBW; TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM; TEX0.PSM = DISPFB.PSM;
GSVector4i r(0, 0, TEX0.TBW * 64, GetFrameRect(i).bottom); int w = TEX0.TBW * 64;
int h = GetFrameRect(i).bottom;
// TODO: round up bottom // TODO: round up bottom
int w = r.width(); if(m_dev->ResizeTexture(&m_texture[i], w, h))
int h = r.height();
if(m_texture[i])
{ {
if(m_texture[i]->GetWidth() != w || m_texture[i]->GetHeight() != h) // TODO
static uint8* buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
static int pitch = 1024 * 4;
GSVector4i r(0, 0, w, h);
m_mem.ReadTexture(r, buff, pitch, TEX0, m_env.TEXA);
m_texture[i]->Update(r, buff, pitch);
if(s_dump)
{ {
delete m_texture[i]; if(s_save)
{
m_texture[i]->Save(format("c:\\temp1\\_%05d_f%I64d_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
m_texture[i] = NULL; s_n++;
} }
} }
if(!m_texture[i])
{
m_texture[i] = m_dev->CreateTexture(w, h);
if(!m_texture[i])
{
return NULL;
}
}
// TODO
static uint8* buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
static int pitch = 1024 * 4;
m_mem.ReadTexture(r, buff, pitch, TEX0, m_env.TEXA);
m_texture[i]->Update(r, buff, pitch);
if(s_dump)
{
if(s_save)
{
m_texture[i]->Save(format("c:\\temp1\\_%05d_f%I64d_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
s_n++;
}
return m_texture[i]; return m_texture[i];
} }
@ -273,9 +256,9 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
p.vm = m_mem.m_vm8; p.vm = m_mem.m_vm8;
p.fbo = m_mem.GetOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM); p.fbo = m_mem.GetPixelOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM);
p.zbo = m_mem.GetOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM); p.zbo = m_mem.GetPixelOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM);
p.fzbo = m_mem.GetOffset4(context->FRAME, context->ZBUF); p.fzbo = m_mem.GetPixelOffset4(context->FRAME, context->ZBUF);
p.sel.key = 0; p.sel.key = 0;
@ -517,14 +500,14 @@ if(!m_dump)
case GS_LINELIST: case GS_LINELIST:
case GS_LINESTRIP: case GS_LINESTRIP:
case GS_SPRITE: case GS_SPRITE:
pmin = v[0].p.minv(v[1].p); pmin = v[0].p.min(v[1].p);
pmax = v[0].p.maxv(v[1].p); pmax = v[0].p.max(v[1].p);
break; break;
case GS_TRIANGLELIST: case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP: case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN: case GS_TRIANGLEFAN:
pmin = v[0].p.minv(v[1].p).minv(v[2].p); pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.maxv(v[1].p).maxv(v[2].p); pmax = v[0].p.max(v[1].p).max(v[2].p);
break; break;
} }

View File

@ -108,9 +108,9 @@ __declspec(align(16)) struct GSScanlineParam
const uint32* clut; const uint32* clut;
uint32 tw; uint32 tw;
GSLocalMemory::Offset* fbo; GSLocalMemory::PixelOffset* fbo;
GSLocalMemory::Offset* zbo; GSLocalMemory::PixelOffset* zbo;
GSLocalMemory::Offset4* fzbo; GSLocalMemory::PixelOffset4* fzbo;
uint32 fm, zm; uint32 fm, zm;
}; };
@ -122,10 +122,10 @@ __declspec(align(16)) struct GSScanlineEnvironment
const uint32* clut; const uint32* clut;
uint32 tw; uint32 tw;
GSVector4i* fbr; int* fbr;
GSVector4i* zbr; int* zbr;
int** fbc; int* fbc;
int** zbc; int* zbc;
GSVector2i* fzbr; GSVector2i* fzbr;
GSVector2i* fzbc; GSVector2i* fzbc;

View File

@ -948,7 +948,7 @@ void GSState::FlushWrite()
r.left = m_env.TRXPOS.DSAX; r.left = m_env.TRXPOS.DSAX;
r.top = y; r.top = y;
r.right = r.left + m_env.TRXREG.RRW; r.right = r.left + m_env.TRXREG.RRW;
r.bottom = min(r.top + m_env.TRXREG.RRH, m_tr.x == r.left ? m_tr.y : m_tr.y + 1); r.bottom = std::min<int>(r.top + m_env.TRXREG.RRH, m_tr.x == r.left ? m_tr.y : m_tr.y + 1);
InvalidateVideoMem(m_env.BITBLTBUF, r); InvalidateVideoMem(m_env.BITBLTBUF, r);
/* /*
@ -1085,17 +1085,20 @@ void GSState::Move()
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format) // TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
GSLocalMemory::PixelOffset* RESTRICT spo = m_mem.GetPixelOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
GSLocalMemory::PixelOffset* RESTRICT dpo = m_mem.GetPixelOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16) if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
{ {
int* soffset = spsm.rowOffset[0]; int* soffset = spo->col[0];
int* doffset = dpsm.rowOffset[0]; int* doffset = dpo->col[0];
if(spsm.trbpp == 32) if(spsm.trbpp == 32)
{ {
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{ {
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); uint32 sbase = spo->row[sy];
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); uint32 dbase = dpo->row[dy];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc) for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{ {
@ -1107,8 +1110,8 @@ void GSState::Move()
{ {
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{ {
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); uint32 sbase = spo->row[sy];
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); uint32 dbase = dpo->row[dy];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc) for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{ {
@ -1120,8 +1123,8 @@ void GSState::Move()
{ {
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{ {
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); uint32 sbase = spo->row[sy];
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); uint32 dbase = dpo->row[dy];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc) for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{ {
@ -1134,11 +1137,11 @@ void GSState::Move()
{ {
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{ {
uint32 sbase = GSLocalMemory::PixelAddress8(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); uint32 sbase = spo->row[sy];
int* soffset = spsm.rowOffset[sy & 7]; uint32 dbase = dpo->row[dy];
uint32 dbase = GSLocalMemory::PixelAddress8(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); int* soffset = spo->col[sy & 7];
int* doffset = dpsm.rowOffset[dy & 7]; int* doffset = dpo->col[dy & 7];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc) for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{ {
@ -1150,11 +1153,11 @@ void GSState::Move()
{ {
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{ {
uint32 sbase = GSLocalMemory::PixelAddress4(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); uint32 sbase = spo->row[sy];
int* soffset = spsm.rowOffset[sy & 7]; uint32 dbase = dpo->row[dy];
uint32 dbase = GSLocalMemory::PixelAddress4(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); int* soffset = spo->col[sy & 7];
int* doffset = dpsm.rowOffset[dy & 7]; int* doffset = dpo->col[dy & 7];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc) for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{ {
@ -1166,11 +1169,11 @@ void GSState::Move()
{ {
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{ {
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); uint32 sbase = spo->row[sy];
int* soffset = spsm.rowOffset[sy & 7]; uint32 dbase = dpo->row[dy];
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); int* soffset = spo->col[sy & 7];
int* doffset = dpsm.rowOffset[dy & 7]; int* doffset = dpo->col[dy & 7];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc) for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{ {

View File

@ -82,7 +82,7 @@ bool GSTexture7::Update(const GSVector4i& r, const void* data, int pitch)
uint8* src = (uint8*)data; uint8* src = (uint8*)data;
uint8* dst = (uint8*)desc.lpSurface; uint8* dst = (uint8*)desc.lpSurface;
int bytes = min(pitch, desc.lPitch); int bytes = std::min<int>(pitch, desc.lPitch);
for(int i = 0, j = r.height(); i < j; i++, src += pitch, dst += desc.lPitch) for(int i = 0, j = r.height(); i < j; i++, src += pitch, dst += desc.lPitch)
{ {

View File

@ -60,7 +60,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
{ {
Source* s = i->first; Source* s = i->first;
if(((s->m_TEX0.u32[0] ^ TEX0.u32[0]) | ((s->m_TEX0.u32[1] ^ TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH if(((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
{ {
continue; continue;
} }
@ -70,7 +70,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
continue; continue;
} }
if(psm.pal > 0 && !GSVector4i::compare(s->m_clut, clut, psm.pal * sizeof(clut[0]))) if(psm.pal > 0 && !GSVector4i::compare(clut, s->m_clut, psm.pal * sizeof(clut[0])))
{ {
continue; continue;
} }
@ -84,13 +84,16 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
if(src == NULL) if(src == NULL)
{ {
uint32 bp = TEX0.TBP0;
uint32 psm = TEX0.PSM;
for(int type = 0; type < 2 && dst == NULL; type++) for(int type = 0; type < 2 && dst == NULL; type++)
{ {
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++)
{ {
Target* t = *i; Target* t = *i;
if(t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(t->m_TEX0.TBP0, t->m_TEX0.PSM, TEX0.TBP0, TEX0.PSM)) if(t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{ {
dst = t; dst = t;
@ -116,7 +119,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
memcpy(src->m_clut, clut, psm.pal * sizeof(clut[0])); memcpy(src->m_clut, clut, psm.pal * sizeof(clut[0]));
} }
m_src.Add(src, TEX0); m_src.Add(src, TEX0, m_renderer->m_mem);
} }
if(psm.pal > 0) if(psm.pal > 0)
@ -142,13 +145,15 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, bool fb) GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, bool fb)
{ {
uint32 bp = TEX0.TBP0;
Target* dst = NULL; Target* dst = NULL;
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++)
{ {
Target* t = *i; Target* t = *i;
if(t->m_TEX0.TBP0 == TEX0.TBP0) if(bp == t->m_TEX0.TBP0)
{ {
m_dst[type].splice(m_dst[type].begin(), m_dst[type], i); m_dst[type].splice(m_dst[type].begin(), m_dst[type], i);
@ -168,7 +173,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
{ {
Target* t = *i; Target* t = *i;
if(t->m_TEX0.TBP0 <= TEX0.TBP0 && TEX0.TBP0 < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0)) if(t->m_TEX0.TBP0 <= bp && bp < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0))
{ {
dst = t; dst = t;
} }
@ -229,14 +234,13 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect, bool target) void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect, bool target)
{ {
bool found = false;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[BITBLTBUF.DPSM];
uint32 bp = BITBLTBUF.DBP; uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW; uint32 bw = BITBLTBUF.DBW;
uint32 psm = BITBLTBUF.DPSM;
GSVector2i bs = (bp & 31) == 0 ? psm.pgs : psm.bs; const GSLocalMemory::BlockOffset* bo = m_renderer->m_mem.GetBlockOffset(bp, bw, psm);
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs); GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
@ -250,20 +254,22 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
Source* s = j->first; Source* s = j->first;
if(GSUtil::HasSharedBits(bp, BITBLTBUF.DPSM, s->m_TEX0.TBP0, s->m_TEX0.PSM)) if(GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM))
{ {
m_src.RemoveAt(s); m_src.RemoveAt(s);
} }
} }
} }
bool found = false;
for(int y = r.top; y < r.bottom; y += bs.y) for(int y = r.top; y < r.bottom; y += bs.y)
{ {
uint32 base = psm.bn(0, y, bp, bw); uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x) for(int x = r.left; x < r.right; x += bs.x)
{ {
uint32 page = (base + psm.blockOffset[x >> 3]) >> 5; uint32 page = (base + bo->col[x >> 3]) >> 5;
if(page < MAX_PAGES) if(page < MAX_PAGES)
{ {
@ -275,20 +281,22 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
Source* s = j->first; Source* s = j->first;
if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, s->m_TEX0.PSM)) if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
{ {
bool b = bp == s->m_TEX0.TBP0;
if(!s->m_target) if(!s->m_target)
{ {
s->m_valid[page] = 0; s->m_valid[page] = 0;
s->m_complete = false; s->m_complete = false;
found = true; found = b;
} }
else else
{ {
// TODO // TODO
if(s->m_TEX0.TBP0 == bp) if(b)
{ {
m_src.RemoveAt(s); m_src.RemoveAt(s);
} }
@ -309,12 +317,12 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
Target* t = *j; Target* t = *j;
if(GSUtil::HasSharedBits(BITBLTBUF.DBP, BITBLTBUF.DPSM, t->m_TEX0.TBP0, t->m_TEX0.PSM)) if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{ {
if(!found && GSUtil::HasCompatibleBits(BITBLTBUF.DPSM, t->m_TEX0.PSM)) if(!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
{ {
t->m_dirty.push_back(GSDirtyRect(r, BITBLTBUF.DPSM)); t->m_dirty.push_back(GSDirtyRect(r, psm));
t->m_TEX0.TBW = BITBLTBUF.DBW; t->m_TEX0.TBW = bw;
} }
else else
{ {
@ -324,20 +332,20 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
} }
} }
if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, t->m_TEX0.PSM) && BITBLTBUF.DBP < t->m_TEX0.TBP0) if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && bp < t->m_TEX0.TBP0)
{ {
uint32 rowsize = BITBLTBUF.DBW * 8192; uint32 rowsize = bw * 8192;
uint32 offset = (uint32)((t->m_TEX0.TBP0 - BITBLTBUF.DBP) * 256); uint32 offset = (uint32)((t->m_TEX0.TBP0 - bp) * 256);
if(rowsize > 0 && offset % rowsize == 0) if(rowsize > 0 && offset % rowsize == 0)
{ {
int y = GSLocalMemory::m_psm[BITBLTBUF.DPSM].pgs.y * offset / rowsize; int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize;
if(r.bottom > y) if(r.bottom > y)
{ {
// TODO: do not add this rect above too // TODO: do not add this rect above too
t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), BITBLTBUF.DPSM)); t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), psm));
t->m_TEX0.TBW = BITBLTBUF.DBW; t->m_TEX0.TBW = bw;
continue; continue;
} }
} }
@ -348,21 +356,24 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
void GSTextureCache::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) void GSTextureCache::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{ {
uint32 bp = BITBLTBUF.SBP;
uint32 psm = BITBLTBUF.SPSM;
for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); ) for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); )
{ {
list<Target*>::iterator j = i++; list<Target*>::iterator j = i++;
Target* t = *j; Target* t = *j;
if(GSUtil::HasSharedBits(BITBLTBUF.SBP, BITBLTBUF.SPSM, t->m_TEX0.TBP0, t->m_TEX0.PSM)) if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{ {
if(GSUtil::HasCompatibleBits(BITBLTBUF.SPSM, t->m_TEX0.PSM)) if(GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
{ {
t->Read(r); t->Read(r);
return; return;
} }
else if(BITBLTBUF.SPSM == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S)) else if(psm == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S))
{ {
// ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit // ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit
@ -518,9 +529,7 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
m_TEX0 = TEX0; m_TEX0 = TEX0;
m_TEXA = TEXA; m_TEXA = TEXA;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; GSVector2i bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs;
GSVector2i bs = psm.bs;
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs); GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
@ -529,20 +538,19 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
m_complete = true; // lame, but better than nothing m_complete = true; // lame, but better than nothing
} }
uint32 bp = m_TEX0.TBP0; const GSLocalMemory::BlockOffset* bo = m_renderer->m_mem.GetBlockOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
uint32 bw = m_TEX0.TBW;
bool repeating = (1 << m_TEX0.TW) > (bw << 6); // TODO: bw == 0 bool repeating = m_TEX0.IsRepeating();
uint32 blocks = 0; uint32 blocks = 0;
for(int y = r.top; y < r.bottom; y += bs.y) for(int y = r.top; y < r.bottom; y += bs.y)
{ {
uint32 base = psm.bn(0, y, bp, bw); uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x) for(int x = r.left; x < r.right; x += bs.x)
{ {
uint32 block = base + psm.blockOffset[x >> 3]; uint32 block = base + bo->col[x >> 3];
if(block < MAX_BLOCKS) if(block < MAX_BLOCKS)
{ {
@ -570,11 +578,11 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
{ {
for(int y = r.top; y < r.bottom; y += bs.y) for(int y = r.top; y < r.bottom; y += bs.y)
{ {
uint32 base = psm.bn(0, y, bp, bw); uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x) for(int x = r.left; x < r.right; x += bs.x)
{ {
uint32 block = base + psm.blockOffset[x >> 3]; uint32 block = base + bo->col[x >> 3];
if(block < MAX_BLOCKS) if(block < MAX_BLOCKS)
{ {
@ -640,7 +648,7 @@ void GSTextureCache::Source::Flush(uint32 count)
int pitch = max(tw, psm.bs.x) * sizeof(uint32); int pitch = max(tw, psm.bs.x) * sizeof(uint32);
const GSLocalMemory& mem = m_renderer->m_mem; GSLocalMemory& mem = m_renderer->m_mem;
GSLocalMemory::readTexture rtx = psm.rtx; GSLocalMemory::readTexture rtx = psm.rtx;
@ -767,42 +775,44 @@ void GSTextureCache::Target::Update()
{ {
// do the most likely thing a direct write would do, clear it // do the most likely thing a direct write would do, clear it
m_renderer->m_dev->ClearDepth(m_texture, 0); if((m_renderer->m_game.flags & CRC::ZWriteMustNotClear) == 0)
{
m_renderer->m_dev->ClearDepth(m_texture, 0);
}
} }
} }
// GSTextureCache::SourceMap // GSTextureCache::SourceMap
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0) void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMemory& mem)
{ {
m_surfaces[s] = true; m_surfaces[s] = true;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
uint32 bp = TEX0.TBP0;
uint32 bw = TEX0.TBW;
if(s->m_target) if(s->m_target)
{ {
// TODO // TODO
m_map[bp >> 5][s] = true; m_map[TEX0.TBP0 >> 5][s] = true;
return; return;
} }
const GSLocalMemory::BlockOffset* bo = mem.GetBlockOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
GSVector2i bs = (bp & 31) ? psm.pgs : psm.bs; GSVector2i bs = (TEX0.TBP0 & 31) ? psm.pgs : psm.bs;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
for(int y = 0; y < th; y += bs.y) for(int y = 0; y < th; y += bs.y)
{ {
uint32 base = psm.bn(0, y, bp, bw); uint32 base = bo->row[y >> 3];
for(int x = 0; x < tw; x += bs.x) for(int x = 0; x < tw; x += bs.x)
{ {
uint32 page = (base + psm.blockOffset[x >> 3]) >> 5; uint32 page = (base + bo->col[x >> 3]) >> 5;
if(page < MAX_PAGES) if(page < MAX_PAGES)
{ {

View File

@ -100,7 +100,7 @@ protected:
SourceMap() : m_used(false) {memset(m_pages, 0, sizeof(m_pages));} SourceMap() : m_used(false) {memset(m_pages, 0, sizeof(m_pages));}
void Add(Source* s, const GIFRegTEX0& TEX0); void Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMemory& mem);
void RemoveAll(); void RemoveAll();
void RemoveAt(Source* s); void RemoveAt(Source* s);

View File

@ -45,7 +45,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
{ {
GSTexture* t2 = i->first; GSTexture* t2 = i->first;
if(((t2->m_TEX0.u32[0] ^ TEX0.u32[0]) | ((t2->m_TEX0.u32[1] ^ TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH if(((TEX0.u32[0] ^ t2->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ t2->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
{ {
continue; continue;
} }
@ -68,21 +68,20 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
m_textures[t] = true; m_textures[t] = true;
const GSLocalMemory::BlockOffset* bo = m_state->m_mem.GetBlockOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
int tw = 1 << TEX0.TW; int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH; int th = 1 << TEX0.TH;
uint32 bp = TEX0.TBP0; for(int y = 0; y < th; y += bs.y)
uint32 bw = TEX0.TBW;
GSVector2i s = (bp & 31) == 0 ? psm.pgs : psm.bs;
for(int y = 0; y < th; y += s.y)
{ {
uint32 base = psm.bn(0, y, bp, bw); uint32 base = bo->row[y >> 3];
for(int x = 0; x < tw; x += s.x) for(int x = 0; x < tw; x += bs.x)
{ {
uint32 page = (base + psm.blockOffset[x >> 3]) >> 5; uint32 page = (base + bo->col[x >> 3]) >> 5;
if(page < MAX_PAGES) if(page < MAX_PAGES)
{ {
@ -168,22 +167,23 @@ void GSTextureCacheSW::IncAge()
void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect) void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect)
{ {
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[BITBLTBUF.DPSM];
uint32 bp = BITBLTBUF.DBP; uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW; uint32 bw = BITBLTBUF.DBW;
uint32 psm = BITBLTBUF.DPSM;
GSVector2i s = (bp & 31) == 0 ? psm.pgs : psm.bs; const GSLocalMemory::BlockOffset* bo = m_state->m_mem.GetBlockOffset(bp, bw, psm);
GSVector4i r = rect.ralign<GSVector4i::Outside>(s); GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
for(int y = r.top; y < r.bottom; y += s.y) GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
for(int y = r.top; y < r.bottom; y += bs.y)
{ {
uint32 base = psm.bn(0, y, bp, bw); uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += s.x) for(int x = r.left; x < r.right; x += bs.x)
{ {
uint32 page = (base + psm.blockOffset[x >> 3]) >> 5; uint32 page = (base + bo->col[x >> 3]) >> 5;
if(page < MAX_PAGES) if(page < MAX_PAGES)
{ {
@ -193,7 +193,7 @@ void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, cons
{ {
GSTexture* t = i->first; GSTexture* t = i->first;
if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, t->m_TEX0.PSM)) if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{ {
t->m_valid[page] = 0; t->m_valid[page] = 0;
t->m_complete = false; t->m_complete = false;
@ -236,10 +236,17 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
GSVector2i s = psm.bs; GSVector2i bs = psm.bs;
int tw = max(1 << TEX0.TW, s.x); int tw = max(1 << TEX0.TW, bs.x);
int th = max(1 << TEX0.TH, s.y); int th = max(1 << TEX0.TH, bs.y);
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
if(r.eq(GSVector4i(0, 0, tw, th)))
{
m_complete = true; // lame, but better than nothing
}
if(m_buff == NULL) if(m_buff == NULL)
{ {
@ -250,25 +257,17 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
return false; return false;
} }
m_tw = max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff m_tw = std::max<int>(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
} }
GSVector4i r = rect.ralign<GSVector4i::Outside>(s);
if(r.eq(GSVector4i(0, 0, tw, th)))
{
m_complete = true; // lame, but better than nothing
}
uint32 bp = TEX0.TBP0;
uint32 bw = TEX0.TBW;
bool repeating = tw > (bw << 6); // TODO: bw == 0
uint32 blocks = 0;
GSLocalMemory& mem = m_state->m_mem; GSLocalMemory& mem = m_state->m_mem;
const GSLocalMemory::BlockOffset* bo = mem.GetBlockOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
bool repeating = m_TEX0.IsRepeating();
uint32 blocks = 0;
GSLocalMemory::readTextureBlock rtxb = psm.rtxbP; GSLocalMemory::readTextureBlock rtxb = psm.rtxbP;
int shift = psm.pal == 0 ? 2 : 0; int shift = psm.pal == 0 ? 2 : 0;
@ -277,13 +276,13 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
uint8* dst = (uint8*)m_buff + pitch * r.top; uint8* dst = (uint8*)m_buff + pitch * r.top;
for(int y = r.top, o = pitch * s.y; y < r.bottom; y += s.y, dst += o) for(int y = r.top, o = pitch * bs.y; y < r.bottom; y += bs.y, dst += o)
{ {
uint32 base = psm.bn(0, y, bp, bw); uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += s.x) for(int x = r.left; x < r.right; x += bs.x)
{ {
uint32 block = base + psm.blockOffset[x >> 3]; uint32 block = base + bo->col[x >> 3];
if(block < MAX_BLOCKS) if(block < MAX_BLOCKS)
{ {
@ -309,13 +308,13 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
{ {
if(repeating) if(repeating)
{ {
for(int y = r.top; y < r.bottom; y += s.y) for(int y = r.top; y < r.bottom; y += bs.y)
{ {
uint32 base = psm.bn(0, y, bp, bw); uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += s.x) for(int x = r.left; x < r.right; x += bs.x)
{ {
uint32 block = base + psm.blockOffset[x >> 3]; uint32 block = base + bo->col[x >> 3];
if(block < MAX_BLOCKS) if(block < MAX_BLOCKS)
{ {
@ -328,7 +327,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
} }
} }
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, s.x * s.y * blocks << shift); m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift);
} }
return true; return true;

View File

@ -28,8 +28,8 @@
static struct GSUtilMaps static struct GSUtilMaps
{ {
uint8 PrimClassField[8]; uint8 PrimClassField[8];
bool CompatibleBitsField[64][64]; uint32 CompatibleBitsField[64][2];
bool SharedBitsField[64][64]; uint32 SharedBitsField[64][2];
struct GSUtilMaps() struct GSUtilMaps()
{ {
@ -44,31 +44,36 @@ static struct GSUtilMaps
memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField)); memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField));
CompatibleBitsField[PSM_PSMCT32][PSM_PSMCT24] = true; for(int i = 0; i < 64; i++)
CompatibleBitsField[PSM_PSMCT24][PSM_PSMCT32] = true; {
CompatibleBitsField[PSM_PSMCT16][PSM_PSMCT16S] = true; CompatibleBitsField[i][i >> 5] |= 1 << (i & 0x1f);
CompatibleBitsField[PSM_PSMCT16S][PSM_PSMCT16] = true; }
CompatibleBitsField[PSM_PSMZ32][PSM_PSMZ24] = true;
CompatibleBitsField[PSM_PSMZ24][PSM_PSMZ32] = true;
CompatibleBitsField[PSM_PSMZ16][PSM_PSMZ16S] = true;
CompatibleBitsField[PSM_PSMZ16S][PSM_PSMZ16] = true;
memset(SharedBitsField, 1, sizeof(SharedBitsField)); CompatibleBitsField[PSM_PSMCT32][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f);
CompatibleBitsField[PSM_PSMCT24][PSM_PSMCT32 >> 5] |= 1 << (PSM_PSMCT32 & 0x1f);
CompatibleBitsField[PSM_PSMCT16][PSM_PSMCT16S >> 5] |= 1 << (PSM_PSMCT16S & 0x1f);
CompatibleBitsField[PSM_PSMCT16S][PSM_PSMCT16 >> 5] |= 1 << (PSM_PSMCT16 & 0x1f);
CompatibleBitsField[PSM_PSMZ32][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f);
CompatibleBitsField[PSM_PSMZ24][PSM_PSMZ32 >> 5] |= 1 << (PSM_PSMZ32 & 0x1f);
CompatibleBitsField[PSM_PSMZ16][PSM_PSMZ16S >> 5] |= 1 << (PSM_PSMZ16S & 0x1f);
CompatibleBitsField[PSM_PSMZ16S][PSM_PSMZ16 >> 5] |= 1 << (PSM_PSMZ16 & 0x1f);
SharedBitsField[PSM_PSMCT24][PSM_PSMT8H] = false; memset(SharedBitsField, 0, sizeof(SharedBitsField));
SharedBitsField[PSM_PSMCT24][PSM_PSMT4HL] = false;
SharedBitsField[PSM_PSMCT24][PSM_PSMT4HH] = false; SharedBitsField[PSM_PSMCT24][PSM_PSMT8H >> 5] |= 1 << (PSM_PSMT8H & 0x1f);
SharedBitsField[PSM_PSMZ24][PSM_PSMT8H] = false; SharedBitsField[PSM_PSMCT24][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f);
SharedBitsField[PSM_PSMZ24][PSM_PSMT4HL] = false; SharedBitsField[PSM_PSMCT24][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f);
SharedBitsField[PSM_PSMZ24][PSM_PSMT4HH] = false; SharedBitsField[PSM_PSMZ24][PSM_PSMT8H >> 5] |= 1 << (PSM_PSMT8H & 0x1f);
SharedBitsField[PSM_PSMT8H][PSM_PSMCT24] = false; SharedBitsField[PSM_PSMZ24][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f);
SharedBitsField[PSM_PSMT8H][PSM_PSMZ24] = false; SharedBitsField[PSM_PSMZ24][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f);
SharedBitsField[PSM_PSMT4HL][PSM_PSMCT24] = false; SharedBitsField[PSM_PSMT8H][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f);
SharedBitsField[PSM_PSMT4HL][PSM_PSMZ24] = false; SharedBitsField[PSM_PSMT8H][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f);
SharedBitsField[PSM_PSMT4HL][PSM_PSMT4HH] = false; SharedBitsField[PSM_PSMT4HL][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f);
SharedBitsField[PSM_PSMT4HH][PSM_PSMCT24] = false; SharedBitsField[PSM_PSMT4HL][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f);
SharedBitsField[PSM_PSMT4HH][PSM_PSMZ24] = false; SharedBitsField[PSM_PSMT4HL][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f);
SharedBitsField[PSM_PSMT4HH][PSM_PSMT4HL] = false; SharedBitsField[PSM_PSMT4HH][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f);
SharedBitsField[PSM_PSMT4HH][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f);
SharedBitsField[PSM_PSMT4HH][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f);
} }
} s_maps; } s_maps;
@ -80,21 +85,17 @@ GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim)
bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm) bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm)
{ {
return s_maps.SharedBitsField[spsm][dpsm]; return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
} }
bool GSUtil::HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm) bool GSUtil::HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm)
{ {
if(sbp != dbp) return false; return ((sbp ^ dbp) | (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f)))) == 0;
return HasSharedBits(spsm, dpsm);
} }
bool GSUtil::HasCompatibleBits(uint32 spsm, uint32 dpsm) bool GSUtil::HasCompatibleBits(uint32 spsm, uint32 dpsm)
{ {
if(spsm == dpsm) return true; return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0;
return s_maps.CompatibleBitsField[spsm][dpsm];
} }
bool GSUtil::CheckDirectX() bool GSUtil::CheckDirectX()

View File

@ -1032,7 +1032,7 @@ public:
bool allfalse() const bool allfalse() const
{ {
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
return _mm_testz_si128(m, m); return _mm_testz_si128(m, m) != 0;
#else #else
return _mm_movemask_epi8(m) == 0; return _mm_movemask_epi8(m) == 0;
#endif #endif
@ -2485,15 +2485,15 @@ public:
GSVector4 clamp(const float scale = 255) const GSVector4 clamp(const float scale = 255) const
{ {
return minv(GSVector4(scale)); return min(GSVector4(scale));
} }
GSVector4 minv(const GSVector4& a) const GSVector4 min(const GSVector4& a) const
{ {
return GSVector4(_mm_min_ps(m, a)); return GSVector4(_mm_min_ps(m, a));
} }
GSVector4 maxv(const GSVector4& a) const GSVector4 max(const GSVector4& a) const
{ {
return GSVector4(_mm_max_ps(m, a)); return GSVector4(_mm_max_ps(m, a));
} }
@ -2550,7 +2550,7 @@ public:
{ {
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
__m128i a = _mm_castps_si128(m); __m128i a = _mm_castps_si128(m);
return _mm_testz_si128(a, a); return _mm_testz_si128(a, a) != 0;
#else #else
return _mm_movemask_ps(m) == 0; return _mm_movemask_ps(m) == 0;
#endif #endif

View File

@ -1371,6 +1371,14 @@
AssemblerOutput="4" AssemblerOutput="4"
/> />
</FileConfiguration> </FileConfiguration>
<FileConfiguration
Name="Debug SSE4|Win32"
>
<Tool
Name="VCCLCompilerTool"
AssemblerOutput="4"
/>
</FileConfiguration>
<FileConfiguration <FileConfiguration
Name="Release SSE4|Win32" Name="Release SSE4|Win32"
> >

View File

@ -190,3 +190,6 @@ typedef signed long long int64;
#include <smmintrin.h> #include <smmintrin.h>
#endif #endif
#undef min
#undef max