GSdx: small optimizations and tried to fix that dx9 fullscreen alt+tab crash

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1463 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-07-04 15:14:04 +00:00
parent 2a5a32d359
commit 4c76909afe
38 changed files with 597 additions and 497 deletions

View File

@ -113,7 +113,17 @@ void GPURenderer::VSync()
Flush();
if(!Merge()) return;
if(!m_dev->IsLost(true))
{
if(!Merge())
{
return;
}
}
else
{
ResetDevice();
}
// osd
@ -148,11 +158,6 @@ void GPURenderer::VSync()
SetWindowText(m_hWnd, s.c_str());
}
if(m_dev->IsLost())
{
ResetDevice();
}
GSVector4i r;
GetClientRect(m_hWnd, r);

View File

@ -50,36 +50,16 @@ GSTexture* GPURendererSW::GetOutput()
r.right <<= m_scale.x;
r.bottom <<= m_scale.y;
// TODO
static uint32* buff = (uint32*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(uint32), 16);
m_mem.ReadFrame32(r, buff, !!m_env.STATUS.ISRGB24);
int w = r.width();
int h = r.height();
if(m_texture)
if(m_dev->ResizeTexture(&m_texture, r.width(), r.height()))
{
if(m_texture->GetWidth() != w || m_texture->GetHeight() != h)
{
delete m_texture;
// TODO
static uint32* buff = (uint32*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(uint32), 16);
m_texture = NULL;
}
m_mem.ReadFrame32(r, buff, !!m_env.STATUS.ISRGB24);
m_texture->Update(r.rsize(), buff, m_mem.GetWidth() * sizeof(uint32));
}
if(!m_texture)
{
m_texture = m_dev->CreateTexture(w, h);
if(!m_texture)
{
return NULL;
}
}
m_texture->Update(r.rsize(), buff, m_mem.GetWidth() * sizeof(uint32));
return m_texture;
}
@ -164,8 +144,8 @@ void GPURendererSW::Draw()
{
GSVector4 p = m_vertices[i].p;
tl = tl.minv(p);
br = br.maxv(p);
tl = tl.min(p);
br = br.max(p);
}
GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor);

View File

@ -801,7 +801,9 @@ union
uint64 _PAD4:30;
};
};
REG_END
REG_END2
__forceinline bool IsRepeating() {return ((uint32)1 << TW) > (TBW << 6);}
REG_END2
REG64_(GIFReg, TEX1)
uint32 LCM:1;

View File

@ -89,6 +89,7 @@ CRC::Game CRC::m_games[] =
{0x7D8F539A, SoTC, EU, 0},
{0x3122B508, OnePieceGrandAdventure, US, 0},
{0x8DF14A24, OnePieceGrandAdventure, Unknown, 0},
{0x5D02CC5B, OnePieceGrandBattle, Unknown, 0},
{0x6F8545DB, ICO, US, 0},
{0xB01A4C95, ICO, JP, 0},
{0x5C991F4E, ICO, Unknown, 0},
@ -143,7 +144,9 @@ CRC::Game CRC::m_games[] =
{0x23A97857, StarOcean3, JPUNDUB, 0},
{0xCC96CE93, ValkyrieProfile2, US, 0},
{0x774DE8E2, ValkyrieProfile2, JP, 0},
{0x47B9B2FD, RadiataStories, US, 0},
{0x04CCB600, ValkyrieProfile2, EU, 0},
{0x47B9B2FD, RadiataStories, US, 0},
{0xE8FCF8EC, SMTNocturne, US, ZWriteMustNotClear}, // saves/reloads z buffer around shadow drawing
};
hash_map<uint32, CRC::Game*> CRC::m_map;

View File

@ -48,6 +48,7 @@ public:
BullyCC,
SoTC,
OnePieceGrandAdventure,
OnePieceGrandBattle,
ICO,
GT4,
WildArms5,
@ -74,6 +75,7 @@ public:
StarOcean3,
ValkyrieProfile2,
RadiataStories,
SMTNocturne,
TitleCount,
};
@ -95,6 +97,7 @@ public:
enum Flags
{
PointListPalette = 1,
ZWriteMustNotClear = 2,
};
struct Game

View File

@ -44,7 +44,7 @@ bool GSDevice::Create(GSWnd* wnd, bool vsync)
return true;
}
bool GSDevice::Reset(int w, int h, bool fs)
bool GSDevice::Reset(int w, int h, int mode)
{
for(list<GSTexture*>::iterator i = m_pool.begin(); i != m_pool.end(); i++)
{
@ -74,13 +74,15 @@ void GSDevice::Present(const GSVector4i& r, int shader)
{
GSVector4i cr = m_wnd->GetClientRect();
// Skip Presentation if the surface is invisible (minimized or hidden); prevents DX null swapchain crashes.
if(cr.width() == 0 || cr.height() == 0)
return;
int w = std::max(cr.width(), 1);
int h = std::max(cr.height(), 1);
if(m_backbuffer->GetWidth() != cr.width() || m_backbuffer->GetHeight() != cr.height())
if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
{
Reset(cr.width(), cr.height(), false);
if(!Reset(w, h, DontCare))
{
return;
}
}
ClearRenderTarget(m_backbuffer, 0);
@ -225,4 +227,22 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse
{
m_current = m_merge;
}
}
}
bool GSDevice::ResizeTexture(GSTexture** t, int w, int h)
{
if(t == NULL) {ASSERT(0); return false;}
GSTexture* t2 = *t;
if(t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h)
{
delete t2;
t2 = CreateTexture(w, h);
*t = t2;
}
return t2 != NULL;
}

View File

@ -73,10 +73,11 @@ public:
void Recycle(GSTexture* t);
virtual bool Create(GSWnd* wnd, bool vsync);
virtual bool Reset(int w, int h, bool fs);
enum {Windowed, Fullscreen, DontCare};
virtual bool IsLost() {return false;}
virtual bool Create(GSWnd* wnd, bool vsync);
virtual bool Reset(int w, int h, int mode);
virtual bool IsLost(bool update = false) {return false;}
virtual void Present(const GSVector4i& r, int shader);
virtual void Flip() {}
@ -105,6 +106,8 @@ public:
void Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c);
void Interlace(const GSVector2i& ds, int field, int mode, float yoffset);
bool ResizeTexture(GSTexture** t, int w, int h);
template<class T> void PrepareShaderMacro(vector<T>& dst, const T* src, const char* model)
{
dst.clear();

View File

@ -207,16 +207,16 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync)
//
Reset(1, 1, false);
Reset(1, 1, Windowed);
//
return true;
}
bool GSDevice10::Reset(int w, int h, bool fs)
bool GSDevice10::Reset(int w, int h, int mode)
{
if(!__super::Reset(w, h, fs))
if(!__super::Reset(w, h, mode))
return false;
DXGI_SWAP_CHAIN_DESC scd;
@ -504,7 +504,7 @@ void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
m_vertices.vb = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000);
m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
}
if(m_vertices.vb == NULL)

View File

@ -96,8 +96,7 @@ public:
virtual ~GSDevice10();
bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs);
bool Reset(int w, int h, int mode);
void Flip();
void BeginScene();

View File

@ -244,16 +244,16 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync)
//
Reset(1, 1, false);
Reset(1, 1, Windowed);
//
return true;
}
bool GSDevice11::Reset(int w, int h, bool fs)
bool GSDevice11::Reset(int w, int h, int mode)
{
if(!__super::Reset(w, h, fs))
if(!__super::Reset(w, h, mode))
return false;
DXGI_SWAP_CHAIN_DESC scd;
@ -541,7 +541,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
m_vertices.vb = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000);
m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
}
if(m_vertices.vb == NULL)

View File

@ -99,8 +99,7 @@ public:
virtual ~GSDevice11();
bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs);
bool Reset(int w, int h, int mode);
void Flip();
void BeginScene();

View File

@ -25,6 +25,7 @@
#include "GSDevice7.h"
GSDevice7::GSDevice7()
: m_lost(false)
{
}
@ -75,14 +76,14 @@ bool GSDevice7::Create(GSWnd* wnd, bool vsync)
return false;
}
Reset(1, 1, false);
Reset(1, 1, Windowed);
return true;
}
bool GSDevice7::Reset(int w, int h, bool fs)
bool GSDevice7::Reset(int w, int h, int mode)
{
if(!__super::Reset(w, h, fs))
if(!__super::Reset(w, h, mode))
return false;
DDSURFACEDESC2 desc;
@ -130,6 +131,8 @@ bool GSDevice7::Reset(int w, int h, bool fs)
}
}
m_lost = false;
return true;
}
@ -139,9 +142,15 @@ void GSDevice7::Present(const GSVector4i& r, int shader)
GSVector4i cr = m_wnd->GetClientRect();
if(m_backbuffer->GetWidth() != cr.width() || m_backbuffer->GetHeight() != cr.height())
int w = std::max(cr.width(), 1);
int h = std::max(cr.height(), 1);
if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h)
{
Reset(cr.width(), cr.height(), false);
if(!Reset(w, h, DontCare))
{
return;
}
}
CComPtr<IDirectDrawSurface7> backbuffer = *(GSTexture7*)m_backbuffer;
@ -157,7 +166,10 @@ void GSDevice7::Present(const GSVector4i& r, int shader)
GSVector4i r2 = r;
hr = backbuffer->Blt(r2, *(GSTexture7*)m_merge, NULL, DDBLT_WAIT, NULL);
if(m_current)
{
hr = backbuffer->Blt(r2, *(GSTexture7*)m_current, NULL, DDBLT_WAIT, NULL);
}
// if ClearRenderTarget was implemented the parent class could handle these tasks until this point
@ -174,6 +186,8 @@ void GSDevice7::Present(const GSVector4i& r, int shader)
if(hr == DDERR_SURFACELOST)
{
m_lost = true;
// TODO
HRESULT hr = m_dd->TestCooperativeLevel();

View File

@ -29,6 +29,7 @@ class GSDevice7 : public GSDevice
private:
CComPtr<IDirectDraw7> m_dd;
CComPtr<IDirectDrawSurface7> m_primary;
bool m_lost;
GSTexture* Create(int type, int w, int h, int format);
@ -40,7 +41,7 @@ public:
virtual ~GSDevice7();
bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs);
bool Reset(int w, int h, int mode);
bool IsLost(bool update) {return m_lost;}
void Present(const GSVector4i& r, int shader);
};

View File

@ -42,6 +42,7 @@ GSDevice9::GSDevice9()
, m_bf(0xffffffff)
, m_rtv(NULL)
, m_dsv(NULL)
, m_lost(false)
{
memset(&m_pp, 0, sizeof(m_pp));
memset(&m_ddcaps, 0, sizeof(m_ddcaps));
@ -105,9 +106,7 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync)
m_d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &m_d3dcaps);
bool fs = theApp.GetConfig("ModeWidth", 0) > 0;
if(!Reset(1, 1, fs)) return false;
if(!Reset(1, 1, theApp.GetConfig("ModeWidth", 0) > 0 ? Fullscreen : Windowed)) return false;
m_dev->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
@ -188,35 +187,48 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync)
return true;
}
bool GSDevice9::Reset(int w, int h, bool fs)
bool GSDevice9::Reset(int w, int h, int mode)
{
if(!__super::Reset(w, h, fs))
if(!__super::Reset(w, h, mode))
return false;
HRESULT hr;
if(!m_d3d) return false;
if(m_swapchain && !fs && m_pp.Windowed)
if(mode == DontCare)
{
m_swapchain = NULL;
mode = m_pp.Windowed ? Windowed : Fullscreen;
}
m_pp.BackBufferWidth = w;
m_pp.BackBufferHeight = h;
if(!m_lost)
{
if(m_swapchain && mode != Fullscreen && m_pp.Windowed)
{
m_swapchain = NULL;
hr = m_dev->CreateAdditionalSwapChain(&m_pp, &m_swapchain);
m_pp.BackBufferWidth = w;
m_pp.BackBufferHeight = h;
if(FAILED(hr)) return false;
hr = m_dev->CreateAdditionalSwapChain(&m_pp, &m_swapchain);
CComPtr<IDirect3DSurface9> backbuffer;
hr = m_swapchain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &backbuffer);
m_backbuffer = new GSTexture9(backbuffer);
if(FAILED(hr)) return false;
return true;
CComPtr<IDirect3DSurface9> backbuffer;
hr = m_swapchain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &backbuffer);
m_backbuffer = new GSTexture9(backbuffer);
return true;
}
}
m_swapchain = NULL;
m_vertices.vb = NULL;
m_vertices.vb_old = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
if(m_vs_cb) _aligned_free(m_vs_cb);
if(m_ps_cb) _aligned_free(m_ps_cb);
@ -258,7 +270,7 @@ bool GSDevice9::Reset(int w, int h, bool fs)
int mh = theApp.GetConfig("ModeHeight", 0);
int mrr = theApp.GetConfig("ModeRefreshRate", 0);
if(fs && mw > 0 && mh > 0 && mrr >= 0)
if(mode == Fullscreen && mw > 0 && mh > 0 && mrr >= 0)
{
m_pp.Windowed = FALSE;
m_pp.BackBufferWidth = mw;
@ -324,27 +336,39 @@ bool GSDevice9::Reset(int w, int h, bool fs)
return true;
}
bool GSDevice9::IsLost()
bool GSDevice9::IsLost(bool update)
{
HRESULT hr = m_dev->TestCooperativeLevel();
return hr == D3DERR_DEVICELOST || hr == D3DERR_DEVICENOTRESET;
if(!m_lost || update)
{
HRESULT hr = m_dev->TestCooperativeLevel();
m_lost = hr == D3DERR_DEVICELOST || hr == D3DERR_DEVICENOTRESET;
}
return m_lost;
}
void GSDevice9::Flip()
{
m_dev->EndScene();
HRESULT hr;
if(m_swapchain)
{
m_swapchain->Present(NULL, NULL, NULL, NULL, 0);
hr = m_swapchain->Present(NULL, NULL, NULL, NULL, 0);
}
else
{
m_dev->Present(NULL, NULL, NULL, NULL);
hr = m_dev->Present(NULL, NULL, NULL, NULL);
}
m_dev->BeginScene();
if(FAILED(hr))
{
m_lost = true;
}
}
void GSDevice9::BeginScene()
@ -663,7 +687,7 @@ void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t co
m_vertices.vb = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000);
m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
}
if(m_vertices.vb == NULL)

View File

@ -95,6 +95,7 @@ private:
CComPtr<IDirect3D9> m_d3d;
CComPtr<IDirect3DDevice9> m_dev;
CComPtr<IDirect3DSwapChain9> m_swapchain;
bool m_lost;
struct
{
@ -132,9 +133,8 @@ public:
virtual ~GSDevice9();
bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs);
bool IsLost();
bool Reset(int w, int h, int mode);
bool IsLost(bool update);
void Flip();
void BeginScene();

View File

@ -29,14 +29,14 @@ bool GSDeviceNull::Create(GSWnd* wnd, bool vsync)
return false;
}
Reset(1, 1, false);
Reset(1, 1, Windowed);
return true;
}
bool GSDeviceNull::Reset(int w, int h, bool fs)
bool GSDeviceNull::Reset(int w, int h, int mode)
{
if(!__super::Reset(w, h, fs))
if(!__super::Reset(w, h, mode))
return false;
return true;

View File

@ -36,5 +36,5 @@ public:
GSDeviceNull() {}
bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs);
bool Reset(int w, int h, int mode);
};

View File

@ -150,14 +150,14 @@ bool GSDeviceOGL::Create(GSWnd* wnd, bool vsync)
*/
GSVector4i r = wnd->GetClientRect();
Reset(r.width(), r.height(), false);
Reset(r.width(), r.height(), Windowed);
return true;
}
bool GSDeviceOGL::Reset(int w, int h, bool fs)
bool GSDeviceOGL::Reset(int w, int h, int mode)
{
if(!__super::Reset(w, h, fs))
if(!__super::Reset(w, h, mode))
return false;
glCullFace(GL_FRONT_AND_BACK); CheckError();
@ -361,7 +361,7 @@ void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t stride, size_t
{
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000);
m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
growbuffer = true;
}

View File

@ -110,8 +110,7 @@ public:
virtual ~GSDeviceOGL();
bool Create(GSWnd* wnd, bool vsync);
bool Reset(int w, int h, bool fs);
bool Reset(int w, int h, int mode);
void Present(const GSVector4i& r, int shader);
void Flip();

View File

@ -48,8 +48,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.vm = p->vm;
m_env.fbr = p->fbo->row;
m_env.zbr = p->zbo->row;
m_env.fbc = p->fbo->col;
m_env.zbc = p->zbo->col;
m_env.fbc = p->fbo->col[0];
m_env.zbc = p->zbo->col[0];
m_env.fzbr = p->fzbo->row;
m_env.fzbc = p->fzbo->col;
m_env.fm = GSVector4i(p->fm);
@ -116,8 +116,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.t.mask.u32[0] = 0;
break;
case CLAMP_REGION_CLAMP:
m_env.t.min.u16[0] = min(context->CLAMP.MINU, tw - 1);
m_env.t.max.u16[0] = min(context->CLAMP.MAXU, tw - 1);
m_env.t.min.u16[0] = std::min<int>(context->CLAMP.MINU, tw - 1);
m_env.t.max.u16[0] = std::min<int>(context->CLAMP.MAXU, tw - 1);
m_env.t.mask.u32[0] = 0;
break;
case CLAMP_REGION_REPEAT:
@ -142,8 +142,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.t.mask.u32[2] = 0;
break;
case CLAMP_REGION_CLAMP:
m_env.t.min.u16[4] = min(context->CLAMP.MINV, th - 1);
m_env.t.max.u16[4] = min(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
m_env.t.min.u16[4] = std::min<int>(context->CLAMP.MINV, th - 1);
m_env.t.max.u16[4] = std::min<int>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
m_env.t.mask.u32[2] = 0;
break;
case CLAMP_REGION_REPEAT:
@ -224,22 +224,22 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
{
if(m == 0)
{
DrawSolidRectT<uint32, false>(m_env.zbr, m_env.zbc[0], r, z, m);
DrawSolidRectT<uint32, false>(m_env.zbr, m_env.zbc, r, z, m);
}
else
{
DrawSolidRectT<uint32, true>(m_env.zbr, m_env.zbc[0], r, z, m);
DrawSolidRectT<uint32, true>(m_env.zbr, m_env.zbc, r, z, m);
}
}
else
{
if(m == 0)
{
DrawSolidRectT<uint16, false>(m_env.zbr, m_env.zbc[0], r, z, m);
DrawSolidRectT<uint16, false>(m_env.zbr, m_env.zbc, r, z, m);
}
else
{
DrawSolidRectT<uint16, true>(m_env.zbr, m_env.zbc[0], r, z, m);
DrawSolidRectT<uint16, true>(m_env.zbr, m_env.zbc, r, z, m);
}
}
}
@ -259,11 +259,11 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
{
if(m == 0)
{
DrawSolidRectT<uint32, false>(m_env.fbr, m_env.fbc[0], r, c, m);
DrawSolidRectT<uint32, false>(m_env.fbr, m_env.fbc, r, c, m);
}
else
{
DrawSolidRectT<uint32, true>(m_env.fbr, m_env.fbc[0], r, c, m);
DrawSolidRectT<uint32, true>(m_env.fbr, m_env.fbc, r, c, m);
}
}
else
@ -272,18 +272,18 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
if(m == 0)
{
DrawSolidRectT<uint16, false>(m_env.fbr, m_env.fbc[0], r, c, m);
DrawSolidRectT<uint16, false>(m_env.fbr, m_env.fbc, r, c, m);
}
else
{
DrawSolidRectT<uint16, true>(m_env.fbr, m_env.fbc[0], r, c, m);
DrawSolidRectT<uint16, true>(m_env.fbr, m_env.fbc, r, c, m);
}
}
}
}
template<class T, bool masked>
void GSDrawScanline::DrawSolidRectT(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m)
void GSDrawScanline::DrawSolidRectT(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m)
{
if(m == 0xffffffff) return;
@ -320,13 +320,13 @@ void GSDrawScanline::DrawSolidRectT(const GSVector4i* row, int* col, const GSVec
}
template<class T, bool masked>
void GSDrawScanline::FillRect(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m)
void GSDrawScanline::FillRect(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m)
{
if(r.x >= r.z) return;
for(int y = r.y; y < r.w; y++)
{
uint32 base = row[y].x;
uint32 base = row[y];
for(int x = r.x; x < r.z; x++)
{
@ -338,13 +338,13 @@ void GSDrawScanline::FillRect(const GSVector4i* row, int* col, const GSVector4i&
}
template<class T, bool masked>
void GSDrawScanline::FillBlock(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
void GSDrawScanline::FillBlock(const int* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
{
if(r.x >= r.z) return;
for(int y = r.y; y < r.w; y += 8)
{
uint32 base = row[y].x;
uint32 base = row[y];
for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{

View File

@ -60,13 +60,13 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
template<class T, bool masked>
void DrawSolidRectT(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m);
void DrawSolidRectT(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m);
template<class T, bool masked>
__forceinline void FillRect(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m);
__forceinline void FillRect(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m);
template<class T, bool masked>
__forceinline void FillBlock(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
__forceinline void FillBlock(const int* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
protected:
GSState* m_state;

View File

@ -28,19 +28,21 @@
#include "GSLocalMemory.h"
#define ASSERT_BLOCK(r, w, h) \
ASSERT((r).width() >= w && (r).height() >= h && !((r).left&(w-1)) && !((r).top&(h-1)) && !((r).right&(w-1)) && !((r).bottom&(h-1))); \
ASSERT((r).width() >= w && (r).height() >= h && !((r).left & (w - 1)) && !((r).top & (h - 1)) && !((r).right & (w - 1)) && !((r).bottom & (h - 1))); \
#define FOREACH_BLOCK_START(w, h, bpp, format) \
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[format]; \
uint32 bp = TEX0.TBP0; \
uint32 bw = TEX0.TBW; \
int offset = dstpitch * h - r.width() * bpp / 8; \
for(int y = r.top, ye = r.bottom; y < ye; y += h, dst += offset) \
{ ASSERT_BLOCK(r, w, h); \
uint32 base = psm.bn(0, y, bp, bw); \
for(int x = r.left, xe = r.right; x < xe; x += w, dst += w * bpp / 8) \
#define FOREACH_BLOCK_START(r, w, h, bpp, psm) \
ASSERT_BLOCK(r, w, h); \
const GSLocalMemory::BlockOffset* RESTRICT _bo = GetBlockOffset(TEX0.TBP0, TEX0.TBW, psm); \
GSVector4i _r = r >> 3; \
uint8* _dst = dst - _r.left * bpp; \
int _offset = dstpitch * h; \
for(int y = _r.top; y < _r.bottom; y += h >> 3, _dst += _offset) \
{ \
uint32 _base = _bo->row[y]; \
for(int x = _r.left; x < _r.right; x += w >> 3) \
{ \
const uint8* src = BlockPtr(base + psm.blockOffset[x >> 3]); \
const uint8* src = BlockPtr(_base + _bo->col[x]); \
uint8* dst = &_dst[x * bpp]; \
#define FOREACH_BLOCK_END }}
@ -463,37 +465,65 @@ GSLocalMemory::~GSLocalMemory()
{
VirtualFree(m_vm8, 0, MEM_RELEASE);
for(hash_map<uint32, Offset*>::iterator i = m_omap.begin(); i != m_omap.end(); i++)
for(hash_map<uint32, BlockOffset*>::iterator i = m_bomap.begin(); i != m_bomap.end(); i++)
{
Offset* o = i->second;
_aligned_free(o->col[0]);
_aligned_free(o);
_aligned_free(i->second);
}
for(hash_map<uint32, Offset4*>::iterator i = m_o4map.begin(); i != m_o4map.end(); i++)
for(hash_map<uint32, PixelOffset*>::iterator i = m_pomap.begin(); i != m_pomap.end(); i++)
{
_aligned_free(i->second);
}
for(hash_map<uint32, PixelOffset4*>::iterator i = m_po4map.begin(); i != m_po4map.end(); i++)
{
_aligned_free(i->second);
}
}
GSLocalMemory::Offset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
GSLocalMemory::BlockOffset* GSLocalMemory::GetBlockOffset(uint32 bp, uint32 bw, uint32 psm)
{
if(bw == 0) {ASSERT(0); return NULL;}
ASSERT(m_psm[psm].bpp > 8); // only for 16/24/32/8h/4hh/4hl formats where all columns are the same
uint32 hash = bp | (bw << 14) | (psm << 20);
hash_map<uint32, Offset*>::iterator i = m_omap.find(hash);
hash_map<uint32, BlockOffset*>::iterator i = m_bomap.find(hash);
if(i != m_omap.end())
if(i != m_bomap.end())
{
return i->second;
}
Offset* o = (Offset*)_aligned_malloc(sizeof(Offset), 16);
BlockOffset* o = (BlockOffset*)_aligned_malloc(sizeof(BlockOffset), 16);
o->hash = hash;
pixelAddress bn = m_psm[psm].bn;
for(int i = 0; i < 256; i++)
{
o->row[i] = (int)bn(0, i << 3, bp, bw);
}
o->col = m_psm[psm].blockOffset;
m_bomap[hash] = o;
return o;
}
GSLocalMemory::PixelOffset* GSLocalMemory::GetPixelOffset(uint32 bp, uint32 bw, uint32 psm)
{
if(bw == 0) {ASSERT(0); return NULL;}
uint32 hash = bp | (bw << 14) | (psm << 20);
hash_map<uint32, PixelOffset*>::iterator i = m_pomap.find(hash);
if(i != m_pomap.end())
{
return i->second;
}
PixelOffset* o = (PixelOffset*)_aligned_malloc(sizeof(PixelOffset), 16);
o->hash = hash;
@ -501,24 +531,20 @@ GSLocalMemory::Offset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm
for(int i = 0; i < 2048; i++)
{
o->row[i] = GSVector4i((int)pa(0, i, bp, bw));
o->row[i] = (int)pa(0, i, bp, bw);
}
int* p = (int*)_aligned_malloc(sizeof(int) * (2048 + 3) * 4, 16);
for(int i = 0; i < 4; i++)
for(int i = 0; i < 8; i++)
{
o->col[i] = &p[2048 * i + ((4 - (i & 3)) & 3)];
memcpy(o->col[i], m_psm[psm].rowOffset[0], sizeof(int) * 2048);
o->col[i] = m_psm[psm].rowOffset[i];
}
m_omap[hash] = o;
m_pomap[hash] = o;
return o;
}
GSLocalMemory::Offset4* GSLocalMemory::GetOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
GSLocalMemory::PixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
{
uint32 fbp = FRAME.Block();
uint32 zbp = ZBUF.Block();
@ -535,14 +561,14 @@ GSLocalMemory::Offset4* GSLocalMemory::GetOffset4(const GIFRegFRAME& FRAME, cons
uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28);
hash_map<uint32, Offset4*>::iterator i = m_o4map.find(hash);
hash_map<uint32, PixelOffset4*>::iterator i = m_po4map.find(hash);
if(i != m_o4map.end())
if(i != m_po4map.end())
{
return i->second;
}
Offset4* o = (Offset4*)_aligned_malloc(sizeof(Offset4), 16);
PixelOffset4* o = (PixelOffset4*)_aligned_malloc(sizeof(PixelOffset4), 16);
o->hash = hash;
@ -564,7 +590,7 @@ GSLocalMemory::Offset4* GSLocalMemory::GetOffset4(const GIFRegFRAME& FRAME, cons
o->col[i].y = m_psm[zpsm].rowOffset[0][i * 4] << zs;
}
m_o4map[hash] = o;
m_po4map[hash] = o;
return o;
}
@ -1436,20 +1462,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
///////////////////
void GSLocalMemory::ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMCT32)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT32)
{
ReadBlock32<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
if(TEXA.AEM)
{
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMCT24)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT24)
{
ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA);
}
@ -1457,7 +1483,7 @@ void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch,
}
else
{
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMCT24)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT24)
{
ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA);
}
@ -1465,11 +1491,11 @@ void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch,
}
}
void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
__declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(16, 8, 32, PSM_PSMCT16)
FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMCT16)
{
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1478,11 +1504,11 @@ void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch,
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
__declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(16, 8, 32, PSM_PSMCT16S)
FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMCT16S)
{
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1491,75 +1517,75 @@ void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(16, 16, 32, PSM_PSMT8)
FOREACH_BLOCK_START(r, 16, 16, 32, PSM_PSMT8)
{
ReadAndExpandBlock8_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint64* pal = m_clut;
FOREACH_BLOCK_START(32, 16, 32, PSM_PSMT4)
FOREACH_BLOCK_START(r, 32, 16, 32, PSM_PSMT4)
{
ReadAndExpandBlock4_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT8H)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT8H)
{
ReadAndExpandBlock8H_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HL)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HL)
{
ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HH)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HH)
{
ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMZ32)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ32)
{
ReadBlock32<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
if(TEXA.AEM)
{
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMZ24)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ24)
{
ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA);
}
@ -1567,7 +1593,7 @@ void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch
}
else
{
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMZ24)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ24)
{
ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA);
}
@ -1575,11 +1601,11 @@ void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch
}
}
void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
__declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(16, 8, 32, PSM_PSMZ16)
FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMZ16)
{
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1588,11 +1614,11 @@ void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
__declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(16, 8, 32, PSM_PSMZ16S)
FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMZ16S)
{
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1737,31 +1763,31 @@ void GSLocalMemory::ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, c
}
///////////////////
void GSLocalMemory::ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(16, 8, 16, PSM_PSMCT16)
FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMCT16)
{
ReadBlock16<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(16, 8, 16, PSM_PSMCT16S)
FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMCT16S)
{
ReadBlock16<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(16, 16, 32, PSM_PSMT8)
FOREACH_BLOCK_START(r, 16, 16, 32, PSM_PSMT8)
{
ReadAndExpandBlock8_32(src, dst, dstpitch, pal);
}
@ -1773,7 +1799,7 @@ void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch
__declspec(align(16)) uint8 block[16 * 16];
FOREACH_BLOCK_START(16, 16, 16, PSM_PSMT8)
FOREACH_BLOCK_START(r, 16, 16, 16, PSM_PSMT8)
{
ReadBlock8<true>(src, (uint8*)block, sizeof(block) / 16);
@ -1783,13 +1809,13 @@ void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch
}
}
void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint64* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(32, 16, 32, PSM_PSMT4)
FOREACH_BLOCK_START(r, 32, 16, 32, PSM_PSMT4)
{
ReadAndExpandBlock4_32(src, dst, dstpitch, pal);
}
@ -1801,7 +1827,7 @@ void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch
__declspec(align(16)) uint8 block[(32 / 2) * 16];
FOREACH_BLOCK_START(32, 16, 16, PSM_PSMT4)
FOREACH_BLOCK_START(r, 32, 16, 16, PSM_PSMT4)
{
ReadBlock4<true>(src, (uint8*)block, sizeof(block) / 16);
@ -1811,13 +1837,13 @@ void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch
}
}
void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT8H)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT8H)
{
ReadAndExpandBlock8H_32(src, dst, dstpitch, pal);
}
@ -1829,7 +1855,7 @@ void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitc
__declspec(align(16)) uint32 block[8 * 8];
FOREACH_BLOCK_START(8, 8, 16, PSM_PSMT8H)
FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT8H)
{
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1839,13 +1865,13 @@ void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitc
}
}
void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HL)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HL)
{
ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal);
}
@ -1857,7 +1883,7 @@ void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpit
__declspec(align(16)) uint32 block[8 * 8];
FOREACH_BLOCK_START(8, 8, 16, PSM_PSMT4HL)
FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT4HL)
{
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1867,13 +1893,13 @@ void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpit
}
}
void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HH)
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HH)
{
ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal);
}
@ -1885,7 +1911,7 @@ void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpit
__declspec(align(16)) uint32 block[8 * 8];
FOREACH_BLOCK_START(8, 8, 16, PSM_PSMT4HH)
FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT4HH)
{
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1895,18 +1921,18 @@ void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpit
}
}
void GSLocalMemory::ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(16, 8, 16, PSM_PSMZ16)
FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMZ16)
{
ReadBlock16<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(16, 8, 16, PSM_PSMZ16S)
FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMZ16S)
{
ReadBlock16<true>(src, dst, dstpitch);
}
@ -1959,45 +1985,45 @@ void GSLocalMemory::ReadTextureNP(const GSVector4i& r, uint8* dst, int dstpitch,
// 32/8
void GSLocalMemory::ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(16, 16, 8, PSM_PSMT8)
FOREACH_BLOCK_START(r, 16, 16, 8, PSM_PSMT8)
{
ReadBlock8<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(32, 16, 8, PSM_PSMT4)
FOREACH_BLOCK_START(r, 32, 16, 8, PSM_PSMT4)
{
ReadBlock4P(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(8, 8, 8, PSM_PSMT8H)
FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT8H)
{
ReadBlock8HP(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(8, 8, 8, PSM_PSMT4HL)
FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT4HL)
{
ReadBlock4HLP(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(8, 8, 8, PSM_PSMT4HH)
FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT4HH)
{
ReadBlock4HHP(src, dst, dstpitch);
}

View File

@ -43,7 +43,7 @@ public:
typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
typedef void (GSLocalMemory::*readTexture)(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*readTexture)(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
typedef union
@ -81,14 +81,21 @@ public:
GSClut m_clut;
struct Offset
struct BlockOffset
{
GSVector4i row[2048]; // 0 | 0 | 0 | 0
int* col[4]; // x | x+1 | x+2 | x+3
int row[256]; // yn (n = 0 8 16 ...)
int* col; // blockOffset*
uint32 hash;
};
struct Offset4
struct PixelOffset
{
int row[2048]; // yn (n = 0 1 2 ...)
int* col[8]; // rowOffset*
uint32 hash;
};
struct PixelOffset4
{
// 16 bit offsets (m_vm16[...])
@ -141,15 +148,17 @@ protected:
//
hash_map<uint32, Offset*> m_omap;
hash_map<uint32, Offset4*> m_o4map;
hash_map<uint32, BlockOffset*> m_bomap;
hash_map<uint32, PixelOffset*> m_pomap;
hash_map<uint32, PixelOffset4*> m_po4map;
public:
GSLocalMemory();
virtual ~GSLocalMemory();
Offset* GetOffset(uint32 bp, uint32 bw, uint32 psm);
Offset4* GetOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
BlockOffset* GetBlockOffset(uint32 bp, uint32 bw, uint32 psm);
PixelOffset* GetPixelOffset(uint32 bp, uint32 bw, uint32 psm);
PixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
// address
@ -781,19 +790,19 @@ public:
// * => 32
void ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
@ -813,25 +822,25 @@ public:
// * => 32/16
void ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTextureNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
// pal ? 8 : 32
void ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;

View File

@ -290,8 +290,8 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
GSVector4 tb = l.p.upl(v[2].p).ceil();
GSVector4 tbmax = tb.maxv(fscissor.yyyy());
GSVector4 tbmin = tb.minv(fscissor.wwww());
GSVector4 tbmax = tb.max(fscissor.yyyy());
GSVector4 tbmin = tb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin));
@ -342,8 +342,8 @@ void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor)
GSVector4 tb = l.p.upl(v[2].p).ceil();
GSVector4 tbmax = tb.maxv(fscissor.yyyy());
GSVector4 tbmin = tb.minv(fscissor.wwww());
GSVector4 tbmax = tb.max(fscissor.yyyy());
GSVector4 tbmin = tb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin));
@ -398,8 +398,8 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso
GSVector4 tb = v[0].p.upl(v[1].p).zwzw(v[1].p.upl(v[2].p)).ceil();
GSVector4 tbmax = tb.maxv(fscissor.yyyy());
GSVector4 tbmin = tb.minv(fscissor.wwww());
GSVector4 tbmax = tb.max(fscissor.yyyy());
GSVector4 tbmin = tb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.xzyw(tbmin));
@ -462,8 +462,8 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
{
GSVector4 lr = l.p.xyxy(r).ceil();
GSVector4 lrmax = lr.maxv(fscissor.xxxx());
GSVector4 lrmin = lr.minv(fscissor.zzzz());
GSVector4 lrmax = lr.max(fscissor.xxxx());
GSVector4 lrmin = lr.min(fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xxzz(lrmin));
@ -503,8 +503,8 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
{
GSVector4 lr = l.p.ceil();
GSVector4 lrmax = lr.maxv(fscissor.xxxx());
GSVector4 lrmin = lr.minv(fscissor.zzzz());
GSVector4 lrmax = lr.max(fscissor.xxxx());
GSVector4 lrmin = lr.min(fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xxyy(lrmin));
@ -613,8 +613,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(orientation)
{
GSVector4 tbmax = lrtb.maxv(fscissor.yyyy());
GSVector4 tbmin = lrtb.minv(fscissor.wwww());
GSVector4 tbmax = lrtb.max(fscissor.yyyy());
GSVector4 tbmin = lrtb.min(fscissor.wwww());
GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin));
@ -711,8 +711,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
else
{
GSVector4 lrmax = lrtb.maxv(fscissor.xxxx());
GSVector4 lrmin = lrtb.minv(fscissor.zzzz());
GSVector4 lrmax = lrtb.max(fscissor.xxxx());
GSVector4 lrmin = lrtb.min(fscissor.zzzz());
GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin));

View File

@ -259,9 +259,17 @@ void GSRenderer::VSync(int field)
Flush();
field = field ? 1 : 0;
if(!Merge(field)) return;
if(!m_dev->IsLost(true))
{
if(!Merge(field ? 1 : 0))
{
return;
}
}
else
{
ResetDevice();
}
// osd
@ -311,11 +319,6 @@ void GSRenderer::VSync(int field)
// present
if(m_dev->IsLost())
{
ResetDevice();
}
m_dev->Present(m_wnd.GetClientRect().fit(m_aspectratio), m_shader);
// snapshot

View File

@ -127,7 +127,10 @@ protected:
{
// FIXME: berserk fpsm = 27 (8H)
Draw();
if(!m_dev->IsLost())
{
Draw();
}
m_perfmon.Put(GSPerfMon::Draw, 1);
}

View File

@ -122,14 +122,14 @@ void GSRendererDX9::VertexKick(bool skip)
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[0].p.minv(v[1].p);
pmax = v[0].p.maxv(v[1].p);
pmin = v[0].p.min(v[1].p);
pmax = v[0].p.max(v[1].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[0].p.minv(v[1].p).minv(v[2].p);
pmax = v[0].p.maxv(v[1].p).maxv(v[2].p);
pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.max(v[1].p).max(v[2].p);
break;
}

View File

@ -87,51 +87,34 @@ GSTexture* GSRendererSW::GetOutput(int i)
TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM;
GSVector4i r(0, 0, TEX0.TBW * 64, GetFrameRect(i).bottom);
int w = TEX0.TBW * 64;
int h = GetFrameRect(i).bottom;
// TODO: round up bottom
int w = r.width();
int h = r.height();
if(m_texture[i])
if(m_dev->ResizeTexture(&m_texture[i], w, h))
{
if(m_texture[i]->GetWidth() != w || m_texture[i]->GetHeight() != h)
// TODO
static uint8* buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
static int pitch = 1024 * 4;
GSVector4i r(0, 0, w, h);
m_mem.ReadTexture(r, buff, pitch, TEX0, m_env.TEXA);
m_texture[i]->Update(r, buff, pitch);
if(s_dump)
{
delete m_texture[i];
if(s_save)
{
m_texture[i]->Save(format("c:\\temp1\\_%05d_f%I64d_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
m_texture[i] = NULL;
s_n++;
}
}
if(!m_texture[i])
{
m_texture[i] = m_dev->CreateTexture(w, h);
if(!m_texture[i])
{
return NULL;
}
}
// TODO
static uint8* buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16);
static int pitch = 1024 * 4;
m_mem.ReadTexture(r, buff, pitch, TEX0, m_env.TEXA);
m_texture[i]->Update(r, buff, pitch);
if(s_dump)
{
if(s_save)
{
m_texture[i]->Save(format("c:\\temp1\\_%05d_f%I64d_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
s_n++;
}
return m_texture[i];
}
@ -273,9 +256,9 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
p.vm = m_mem.m_vm8;
p.fbo = m_mem.GetOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM);
p.zbo = m_mem.GetOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM);
p.fzbo = m_mem.GetOffset4(context->FRAME, context->ZBUF);
p.fbo = m_mem.GetPixelOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM);
p.zbo = m_mem.GetPixelOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM);
p.fzbo = m_mem.GetPixelOffset4(context->FRAME, context->ZBUF);
p.sel.key = 0;
@ -517,14 +500,14 @@ if(!m_dump)
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[0].p.minv(v[1].p);
pmax = v[0].p.maxv(v[1].p);
pmin = v[0].p.min(v[1].p);
pmax = v[0].p.max(v[1].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[0].p.minv(v[1].p).minv(v[2].p);
pmax = v[0].p.maxv(v[1].p).maxv(v[2].p);
pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.max(v[1].p).max(v[2].p);
break;
}

View File

@ -108,9 +108,9 @@ __declspec(align(16)) struct GSScanlineParam
const uint32* clut;
uint32 tw;
GSLocalMemory::Offset* fbo;
GSLocalMemory::Offset* zbo;
GSLocalMemory::Offset4* fzbo;
GSLocalMemory::PixelOffset* fbo;
GSLocalMemory::PixelOffset* zbo;
GSLocalMemory::PixelOffset4* fzbo;
uint32 fm, zm;
};
@ -122,10 +122,10 @@ __declspec(align(16)) struct GSScanlineEnvironment
const uint32* clut;
uint32 tw;
GSVector4i* fbr;
GSVector4i* zbr;
int** fbc;
int** zbc;
int* fbr;
int* zbr;
int* fbc;
int* zbc;
GSVector2i* fzbr;
GSVector2i* fzbc;

View File

@ -948,7 +948,7 @@ void GSState::FlushWrite()
r.left = m_env.TRXPOS.DSAX;
r.top = y;
r.right = r.left + m_env.TRXREG.RRW;
r.bottom = min(r.top + m_env.TRXREG.RRH, m_tr.x == r.left ? m_tr.y : m_tr.y + 1);
r.bottom = std::min<int>(r.top + m_env.TRXREG.RRH, m_tr.x == r.left ? m_tr.y : m_tr.y + 1);
InvalidateVideoMem(m_env.BITBLTBUF, r);
/*
@ -1085,17 +1085,20 @@ void GSState::Move()
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
GSLocalMemory::PixelOffset* RESTRICT spo = m_mem.GetPixelOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
GSLocalMemory::PixelOffset* RESTRICT dpo = m_mem.GetPixelOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
{
int* soffset = spsm.rowOffset[0];
int* doffset = dpsm.rowOffset[0];
int* soffset = spo->col[0];
int* doffset = dpo->col[0];
if(spsm.trbpp == 32)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
@ -1107,8 +1110,8 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
@ -1120,8 +1123,8 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
@ -1134,11 +1137,11 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = GSLocalMemory::PixelAddress8(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
int* soffset = spsm.rowOffset[sy & 7];
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
uint32 dbase = GSLocalMemory::PixelAddress8(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
int* doffset = dpsm.rowOffset[dy & 7];
int* soffset = spo->col[sy & 7];
int* doffset = dpo->col[dy & 7];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
@ -1150,11 +1153,11 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = GSLocalMemory::PixelAddress4(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
int* soffset = spsm.rowOffset[sy & 7];
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
uint32 dbase = GSLocalMemory::PixelAddress4(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
int* doffset = dpsm.rowOffset[dy & 7];
int* soffset = spo->col[sy & 7];
int* doffset = dpo->col[dy & 7];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
@ -1166,11 +1169,11 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
int* soffset = spsm.rowOffset[sy & 7];
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
int* doffset = dpsm.rowOffset[dy & 7];
int* soffset = spo->col[sy & 7];
int* doffset = dpo->col[dy & 7];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{

View File

@ -82,7 +82,7 @@ bool GSTexture7::Update(const GSVector4i& r, const void* data, int pitch)
uint8* src = (uint8*)data;
uint8* dst = (uint8*)desc.lpSurface;
int bytes = min(pitch, desc.lPitch);
int bytes = std::min<int>(pitch, desc.lPitch);
for(int i = 0, j = r.height(); i < j; i++, src += pitch, dst += desc.lPitch)
{

View File

@ -60,7 +60,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
{
Source* s = i->first;
if(((s->m_TEX0.u32[0] ^ TEX0.u32[0]) | ((s->m_TEX0.u32[1] ^ TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
if(((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
{
continue;
}
@ -70,7 +70,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
continue;
}
if(psm.pal > 0 && !GSVector4i::compare(s->m_clut, clut, psm.pal * sizeof(clut[0])))
if(psm.pal > 0 && !GSVector4i::compare(clut, s->m_clut, psm.pal * sizeof(clut[0])))
{
continue;
}
@ -84,13 +84,16 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
if(src == NULL)
{
uint32 bp = TEX0.TBP0;
uint32 psm = TEX0.PSM;
for(int type = 0; type < 2 && dst == NULL; type++)
{
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++)
{
Target* t = *i;
if(t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(t->m_TEX0.TBP0, t->m_TEX0.PSM, TEX0.TBP0, TEX0.PSM))
if(t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
dst = t;
@ -116,7 +119,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
memcpy(src->m_clut, clut, psm.pal * sizeof(clut[0]));
}
m_src.Add(src, TEX0);
m_src.Add(src, TEX0, m_renderer->m_mem);
}
if(psm.pal > 0)
@ -142,13 +145,15 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, bool fb)
{
uint32 bp = TEX0.TBP0;
Target* dst = NULL;
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++)
{
Target* t = *i;
if(t->m_TEX0.TBP0 == TEX0.TBP0)
if(bp == t->m_TEX0.TBP0)
{
m_dst[type].splice(m_dst[type].begin(), m_dst[type], i);
@ -168,7 +173,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
{
Target* t = *i;
if(t->m_TEX0.TBP0 <= TEX0.TBP0 && TEX0.TBP0 < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0))
if(t->m_TEX0.TBP0 <= bp && bp < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0))
{
dst = t;
}
@ -229,14 +234,13 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect, bool target)
{
bool found = false;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[BITBLTBUF.DPSM];
uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW;
uint32 psm = BITBLTBUF.DPSM;
GSVector2i bs = (bp & 31) == 0 ? psm.pgs : psm.bs;
const GSLocalMemory::BlockOffset* bo = m_renderer->m_mem.GetBlockOffset(bp, bw, psm);
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
@ -250,20 +254,22 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
Source* s = j->first;
if(GSUtil::HasSharedBits(bp, BITBLTBUF.DPSM, s->m_TEX0.TBP0, s->m_TEX0.PSM))
if(GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM))
{
m_src.RemoveAt(s);
}
}
}
bool found = false;
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = psm.bn(0, y, bp, bw);
uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 page = (base + psm.blockOffset[x >> 3]) >> 5;
uint32 page = (base + bo->col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
@ -275,20 +281,22 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
Source* s = j->first;
if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, s->m_TEX0.PSM))
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
{
bool b = bp == s->m_TEX0.TBP0;
if(!s->m_target)
{
s->m_valid[page] = 0;
s->m_complete = false;
found = true;
found = b;
}
else
{
// TODO
if(s->m_TEX0.TBP0 == bp)
if(b)
{
m_src.RemoveAt(s);
}
@ -309,12 +317,12 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
Target* t = *j;
if(GSUtil::HasSharedBits(BITBLTBUF.DBP, BITBLTBUF.DPSM, t->m_TEX0.TBP0, t->m_TEX0.PSM))
if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
if(!found && GSUtil::HasCompatibleBits(BITBLTBUF.DPSM, t->m_TEX0.PSM))
if(!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
{
t->m_dirty.push_back(GSDirtyRect(r, BITBLTBUF.DPSM));
t->m_TEX0.TBW = BITBLTBUF.DBW;
t->m_dirty.push_back(GSDirtyRect(r, psm));
t->m_TEX0.TBW = bw;
}
else
{
@ -324,20 +332,20 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
}
}
if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, t->m_TEX0.PSM) && BITBLTBUF.DBP < t->m_TEX0.TBP0)
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && bp < t->m_TEX0.TBP0)
{
uint32 rowsize = BITBLTBUF.DBW * 8192;
uint32 offset = (uint32)((t->m_TEX0.TBP0 - BITBLTBUF.DBP) * 256);
uint32 rowsize = bw * 8192;
uint32 offset = (uint32)((t->m_TEX0.TBP0 - bp) * 256);
if(rowsize > 0 && offset % rowsize == 0)
{
int y = GSLocalMemory::m_psm[BITBLTBUF.DPSM].pgs.y * offset / rowsize;
int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize;
if(r.bottom > y)
{
// TODO: do not add this rect above too
t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), BITBLTBUF.DPSM));
t->m_TEX0.TBW = BITBLTBUF.DBW;
t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), psm));
t->m_TEX0.TBW = bw;
continue;
}
}
@ -348,21 +356,24 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
void GSTextureCache::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
uint32 bp = BITBLTBUF.SBP;
uint32 psm = BITBLTBUF.SPSM;
for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); )
{
list<Target*>::iterator j = i++;
Target* t = *j;
if(GSUtil::HasSharedBits(BITBLTBUF.SBP, BITBLTBUF.SPSM, t->m_TEX0.TBP0, t->m_TEX0.PSM))
if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
if(GSUtil::HasCompatibleBits(BITBLTBUF.SPSM, t->m_TEX0.PSM))
if(GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
{
t->Read(r);
return;
}
else if(BITBLTBUF.SPSM == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S))
else if(psm == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S))
{
// ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit
@ -518,9 +529,7 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
m_TEX0 = TEX0;
m_TEXA = TEXA;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
GSVector2i bs = psm.bs;
GSVector2i bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs;
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
@ -529,20 +538,19 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
m_complete = true; // lame, but better than nothing
}
uint32 bp = m_TEX0.TBP0;
uint32 bw = m_TEX0.TBW;
const GSLocalMemory::BlockOffset* bo = m_renderer->m_mem.GetBlockOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
bool repeating = (1 << m_TEX0.TW) > (bw << 6); // TODO: bw == 0
bool repeating = m_TEX0.IsRepeating();
uint32 blocks = 0;
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = psm.bn(0, y, bp, bw);
uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + psm.blockOffset[x >> 3];
uint32 block = base + bo->col[x >> 3];
if(block < MAX_BLOCKS)
{
@ -570,11 +578,11 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
{
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = psm.bn(0, y, bp, bw);
uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + psm.blockOffset[x >> 3];
uint32 block = base + bo->col[x >> 3];
if(block < MAX_BLOCKS)
{
@ -640,7 +648,7 @@ void GSTextureCache::Source::Flush(uint32 count)
int pitch = max(tw, psm.bs.x) * sizeof(uint32);
const GSLocalMemory& mem = m_renderer->m_mem;
GSLocalMemory& mem = m_renderer->m_mem;
GSLocalMemory::readTexture rtx = psm.rtx;
@ -767,42 +775,44 @@ void GSTextureCache::Target::Update()
{
// do the most likely thing a direct write would do, clear it
m_renderer->m_dev->ClearDepth(m_texture, 0);
if((m_renderer->m_game.flags & CRC::ZWriteMustNotClear) == 0)
{
m_renderer->m_dev->ClearDepth(m_texture, 0);
}
}
}
// GSTextureCache::SourceMap
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0)
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMemory& mem)
{
m_surfaces[s] = true;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
uint32 bp = TEX0.TBP0;
uint32 bw = TEX0.TBW;
if(s->m_target)
{
// TODO
m_map[bp >> 5][s] = true;
m_map[TEX0.TBP0 >> 5][s] = true;
return;
}
const GSLocalMemory::BlockOffset* bo = mem.GetBlockOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
GSVector2i bs = (bp & 31) ? psm.pgs : psm.bs;
GSVector2i bs = (TEX0.TBP0 & 31) ? psm.pgs : psm.bs;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
for(int y = 0; y < th; y += bs.y)
{
uint32 base = psm.bn(0, y, bp, bw);
uint32 base = bo->row[y >> 3];
for(int x = 0; x < tw; x += bs.x)
{
uint32 page = (base + psm.blockOffset[x >> 3]) >> 5;
uint32 page = (base + bo->col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{

View File

@ -100,7 +100,7 @@ protected:
SourceMap() : m_used(false) {memset(m_pages, 0, sizeof(m_pages));}
void Add(Source* s, const GIFRegTEX0& TEX0);
void Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMemory& mem);
void RemoveAll();
void RemoveAt(Source* s);

View File

@ -45,7 +45,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
{
GSTexture* t2 = i->first;
if(((t2->m_TEX0.u32[0] ^ TEX0.u32[0]) | ((t2->m_TEX0.u32[1] ^ TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
if(((TEX0.u32[0] ^ t2->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ t2->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
{
continue;
}
@ -68,21 +68,20 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
m_textures[t] = true;
const GSLocalMemory::BlockOffset* bo = m_state->m_mem.GetBlockOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
uint32 bp = TEX0.TBP0;
uint32 bw = TEX0.TBW;
GSVector2i s = (bp & 31) == 0 ? psm.pgs : psm.bs;
for(int y = 0; y < th; y += s.y)
for(int y = 0; y < th; y += bs.y)
{
uint32 base = psm.bn(0, y, bp, bw);
uint32 base = bo->row[y >> 3];
for(int x = 0; x < tw; x += s.x)
for(int x = 0; x < tw; x += bs.x)
{
uint32 page = (base + psm.blockOffset[x >> 3]) >> 5;
uint32 page = (base + bo->col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
@ -168,22 +167,23 @@ void GSTextureCacheSW::IncAge()
void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[BITBLTBUF.DPSM];
uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW;
uint32 psm = BITBLTBUF.DPSM;
GSVector2i s = (bp & 31) == 0 ? psm.pgs : psm.bs;
const GSLocalMemory::BlockOffset* bo = m_state->m_mem.GetBlockOffset(bp, bw, psm);
GSVector4i r = rect.ralign<GSVector4i::Outside>(s);
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
for(int y = r.top; y < r.bottom; y += s.y)
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = psm.bn(0, y, bp, bw);
uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += s.x)
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 page = (base + psm.blockOffset[x >> 3]) >> 5;
uint32 page = (base + bo->col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
@ -193,7 +193,7 @@ void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, cons
{
GSTexture* t = i->first;
if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, t->m_TEX0.PSM))
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
t->m_valid[page] = 0;
t->m_complete = false;
@ -236,10 +236,17 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
GSVector2i s = psm.bs;
GSVector2i bs = psm.bs;
int tw = max(1 << TEX0.TW, s.x);
int th = max(1 << TEX0.TH, s.y);
int tw = max(1 << TEX0.TW, bs.x);
int th = max(1 << TEX0.TH, bs.y);
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
if(r.eq(GSVector4i(0, 0, tw, th)))
{
m_complete = true; // lame, but better than nothing
}
if(m_buff == NULL)
{
@ -250,25 +257,17 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
return false;
}
m_tw = max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
m_tw = std::max<int>(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
}
GSVector4i r = rect.ralign<GSVector4i::Outside>(s);
if(r.eq(GSVector4i(0, 0, tw, th)))
{
m_complete = true; // lame, but better than nothing
}
uint32 bp = TEX0.TBP0;
uint32 bw = TEX0.TBW;
bool repeating = tw > (bw << 6); // TODO: bw == 0
uint32 blocks = 0;
GSLocalMemory& mem = m_state->m_mem;
const GSLocalMemory::BlockOffset* bo = mem.GetBlockOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
bool repeating = m_TEX0.IsRepeating();
uint32 blocks = 0;
GSLocalMemory::readTextureBlock rtxb = psm.rtxbP;
int shift = psm.pal == 0 ? 2 : 0;
@ -277,13 +276,13 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
uint8* dst = (uint8*)m_buff + pitch * r.top;
for(int y = r.top, o = pitch * s.y; y < r.bottom; y += s.y, dst += o)
for(int y = r.top, o = pitch * bs.y; y < r.bottom; y += bs.y, dst += o)
{
uint32 base = psm.bn(0, y, bp, bw);
uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += s.x)
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + psm.blockOffset[x >> 3];
uint32 block = base + bo->col[x >> 3];
if(block < MAX_BLOCKS)
{
@ -309,13 +308,13 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
{
if(repeating)
{
for(int y = r.top; y < r.bottom; y += s.y)
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = psm.bn(0, y, bp, bw);
uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += s.x)
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + psm.blockOffset[x >> 3];
uint32 block = base + bo->col[x >> 3];
if(block < MAX_BLOCKS)
{
@ -328,7 +327,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
}
}
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, s.x * s.y * blocks << shift);
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift);
}
return true;

View File

@ -28,8 +28,8 @@
static struct GSUtilMaps
{
uint8 PrimClassField[8];
bool CompatibleBitsField[64][64];
bool SharedBitsField[64][64];
uint32 CompatibleBitsField[64][2];
uint32 SharedBitsField[64][2];
struct GSUtilMaps()
{
@ -44,31 +44,36 @@ static struct GSUtilMaps
memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField));
CompatibleBitsField[PSM_PSMCT32][PSM_PSMCT24] = true;
CompatibleBitsField[PSM_PSMCT24][PSM_PSMCT32] = true;
CompatibleBitsField[PSM_PSMCT16][PSM_PSMCT16S] = true;
CompatibleBitsField[PSM_PSMCT16S][PSM_PSMCT16] = true;
CompatibleBitsField[PSM_PSMZ32][PSM_PSMZ24] = true;
CompatibleBitsField[PSM_PSMZ24][PSM_PSMZ32] = true;
CompatibleBitsField[PSM_PSMZ16][PSM_PSMZ16S] = true;
CompatibleBitsField[PSM_PSMZ16S][PSM_PSMZ16] = true;
for(int i = 0; i < 64; i++)
{
CompatibleBitsField[i][i >> 5] |= 1 << (i & 0x1f);
}
memset(SharedBitsField, 1, sizeof(SharedBitsField));
CompatibleBitsField[PSM_PSMCT32][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f);
CompatibleBitsField[PSM_PSMCT24][PSM_PSMCT32 >> 5] |= 1 << (PSM_PSMCT32 & 0x1f);
CompatibleBitsField[PSM_PSMCT16][PSM_PSMCT16S >> 5] |= 1 << (PSM_PSMCT16S & 0x1f);
CompatibleBitsField[PSM_PSMCT16S][PSM_PSMCT16 >> 5] |= 1 << (PSM_PSMCT16 & 0x1f);
CompatibleBitsField[PSM_PSMZ32][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f);
CompatibleBitsField[PSM_PSMZ24][PSM_PSMZ32 >> 5] |= 1 << (PSM_PSMZ32 & 0x1f);
CompatibleBitsField[PSM_PSMZ16][PSM_PSMZ16S >> 5] |= 1 << (PSM_PSMZ16S & 0x1f);
CompatibleBitsField[PSM_PSMZ16S][PSM_PSMZ16 >> 5] |= 1 << (PSM_PSMZ16 & 0x1f);
SharedBitsField[PSM_PSMCT24][PSM_PSMT8H] = false;
SharedBitsField[PSM_PSMCT24][PSM_PSMT4HL] = false;
SharedBitsField[PSM_PSMCT24][PSM_PSMT4HH] = false;
SharedBitsField[PSM_PSMZ24][PSM_PSMT8H] = false;
SharedBitsField[PSM_PSMZ24][PSM_PSMT4HL] = false;
SharedBitsField[PSM_PSMZ24][PSM_PSMT4HH] = false;
SharedBitsField[PSM_PSMT8H][PSM_PSMCT24] = false;
SharedBitsField[PSM_PSMT8H][PSM_PSMZ24] = false;
SharedBitsField[PSM_PSMT4HL][PSM_PSMCT24] = false;
SharedBitsField[PSM_PSMT4HL][PSM_PSMZ24] = false;
SharedBitsField[PSM_PSMT4HL][PSM_PSMT4HH] = false;
SharedBitsField[PSM_PSMT4HH][PSM_PSMCT24] = false;
SharedBitsField[PSM_PSMT4HH][PSM_PSMZ24] = false;
SharedBitsField[PSM_PSMT4HH][PSM_PSMT4HL] = false;
memset(SharedBitsField, 0, sizeof(SharedBitsField));
SharedBitsField[PSM_PSMCT24][PSM_PSMT8H >> 5] |= 1 << (PSM_PSMT8H & 0x1f);
SharedBitsField[PSM_PSMCT24][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f);
SharedBitsField[PSM_PSMCT24][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f);
SharedBitsField[PSM_PSMZ24][PSM_PSMT8H >> 5] |= 1 << (PSM_PSMT8H & 0x1f);
SharedBitsField[PSM_PSMZ24][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f);
SharedBitsField[PSM_PSMZ24][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f);
SharedBitsField[PSM_PSMT8H][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f);
SharedBitsField[PSM_PSMT8H][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f);
SharedBitsField[PSM_PSMT4HL][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f);
SharedBitsField[PSM_PSMT4HL][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f);
SharedBitsField[PSM_PSMT4HL][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f);
SharedBitsField[PSM_PSMT4HH][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f);
SharedBitsField[PSM_PSMT4HH][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f);
SharedBitsField[PSM_PSMT4HH][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f);
}
} s_maps;
@ -80,21 +85,17 @@ GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim)
bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm)
{
return s_maps.SharedBitsField[spsm][dpsm];
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
}
bool GSUtil::HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm)
{
if(sbp != dbp) return false;
return HasSharedBits(spsm, dpsm);
return ((sbp ^ dbp) | (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f)))) == 0;
}
bool GSUtil::HasCompatibleBits(uint32 spsm, uint32 dpsm)
{
if(spsm == dpsm) return true;
return s_maps.CompatibleBitsField[spsm][dpsm];
return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0;
}
bool GSUtil::CheckDirectX()

View File

@ -1032,7 +1032,7 @@ public:
bool allfalse() const
{
#if _M_SSE >= 0x401
return _mm_testz_si128(m, m);
return _mm_testz_si128(m, m) != 0;
#else
return _mm_movemask_epi8(m) == 0;
#endif
@ -2485,15 +2485,15 @@ public:
GSVector4 clamp(const float scale = 255) const
{
return minv(GSVector4(scale));
return min(GSVector4(scale));
}
GSVector4 minv(const GSVector4& a) const
GSVector4 min(const GSVector4& a) const
{
return GSVector4(_mm_min_ps(m, a));
}
GSVector4 maxv(const GSVector4& a) const
GSVector4 max(const GSVector4& a) const
{
return GSVector4(_mm_max_ps(m, a));
}
@ -2550,7 +2550,7 @@ public:
{
#if _M_SSE >= 0x401
__m128i a = _mm_castps_si128(m);
return _mm_testz_si128(a, a);
return _mm_testz_si128(a, a) != 0;
#else
return _mm_movemask_ps(m) == 0;
#endif

View File

@ -1371,6 +1371,14 @@
AssemblerOutput="4"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug SSE4|Win32"
>
<Tool
Name="VCCLCompilerTool"
AssemblerOutput="4"
/>
</FileConfiguration>
<FileConfiguration
Name="Release SSE4|Win32"
>

View File

@ -190,3 +190,6 @@ typedef signed long long int64;
#include <smmintrin.h>
#endif
#undef min
#undef max