diff --git a/plugins/GSdx/GPURenderer.cpp b/plugins/GSdx/GPURenderer.cpp index 4e2ba0d3a0..9e3f5e1607 100644 --- a/plugins/GSdx/GPURenderer.cpp +++ b/plugins/GSdx/GPURenderer.cpp @@ -113,7 +113,17 @@ void GPURenderer::VSync() Flush(); - if(!Merge()) return; + if(!m_dev->IsLost(true)) + { + if(!Merge()) + { + return; + } + } + else + { + ResetDevice(); + } // osd @@ -148,11 +158,6 @@ void GPURenderer::VSync() SetWindowText(m_hWnd, s.c_str()); } - if(m_dev->IsLost()) - { - ResetDevice(); - } - GSVector4i r; GetClientRect(m_hWnd, r); diff --git a/plugins/GSdx/GPURendererSW.cpp b/plugins/GSdx/GPURendererSW.cpp index c4b63199c2..0b5acbff97 100644 --- a/plugins/GSdx/GPURendererSW.cpp +++ b/plugins/GSdx/GPURendererSW.cpp @@ -50,36 +50,16 @@ GSTexture* GPURendererSW::GetOutput() r.right <<= m_scale.x; r.bottom <<= m_scale.y; - // TODO - static uint32* buff = (uint32*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(uint32), 16); - - m_mem.ReadFrame32(r, buff, !!m_env.STATUS.ISRGB24); - - int w = r.width(); - int h = r.height(); - - if(m_texture) + if(m_dev->ResizeTexture(&m_texture, r.width(), r.height())) { - if(m_texture->GetWidth() != w || m_texture->GetHeight() != h) - { - delete m_texture; + // TODO + static uint32* buff = (uint32*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(uint32), 16); - m_texture = NULL; - } + m_mem.ReadFrame32(r, buff, !!m_env.STATUS.ISRGB24); + + m_texture->Update(r.rsize(), buff, m_mem.GetWidth() * sizeof(uint32)); } - if(!m_texture) - { - m_texture = m_dev->CreateTexture(w, h); - - if(!m_texture) - { - return NULL; - } - } - - m_texture->Update(r.rsize(), buff, m_mem.GetWidth() * sizeof(uint32)); - return m_texture; } @@ -164,8 +144,8 @@ void GPURendererSW::Draw() { GSVector4 p = m_vertices[i].p; - tl = tl.minv(p); - br = br.maxv(p); + tl = tl.min(p); + br = br.max(p); } GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor); diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 3ef534c9bc..885ae92db8 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -801,7 +801,9 @@ union uint64 _PAD4:30; }; }; -REG_END +REG_END2 + __forceinline bool IsRepeating() {return ((uint32)1 << TW) > (TBW << 6);} +REG_END2 REG64_(GIFReg, TEX1) uint32 LCM:1; diff --git a/plugins/GSdx/GSCrc.cpp b/plugins/GSdx/GSCrc.cpp index f21c3799c4..f95cabb957 100644 --- a/plugins/GSdx/GSCrc.cpp +++ b/plugins/GSdx/GSCrc.cpp @@ -89,6 +89,7 @@ CRC::Game CRC::m_games[] = {0x7D8F539A, SoTC, EU, 0}, {0x3122B508, OnePieceGrandAdventure, US, 0}, {0x8DF14A24, OnePieceGrandAdventure, Unknown, 0}, + {0x5D02CC5B, OnePieceGrandBattle, Unknown, 0}, {0x6F8545DB, ICO, US, 0}, {0xB01A4C95, ICO, JP, 0}, {0x5C991F4E, ICO, Unknown, 0}, @@ -143,7 +144,9 @@ CRC::Game CRC::m_games[] = {0x23A97857, StarOcean3, JPUNDUB, 0}, {0xCC96CE93, ValkyrieProfile2, US, 0}, {0x774DE8E2, ValkyrieProfile2, JP, 0}, - {0x47B9B2FD, RadiataStories, US, 0}, + {0x04CCB600, ValkyrieProfile2, EU, 0}, + {0x47B9B2FD, RadiataStories, US, 0}, + {0xE8FCF8EC, SMTNocturne, US, ZWriteMustNotClear}, // saves/reloads z buffer around shadow drawing }; hash_map CRC::m_map; diff --git a/plugins/GSdx/GSCrc.h b/plugins/GSdx/GSCrc.h index 8247e4fec0..d6c1da82d9 100644 --- a/plugins/GSdx/GSCrc.h +++ b/plugins/GSdx/GSCrc.h @@ -48,6 +48,7 @@ public: BullyCC, SoTC, OnePieceGrandAdventure, + OnePieceGrandBattle, ICO, GT4, WildArms5, @@ -74,6 +75,7 @@ public: StarOcean3, ValkyrieProfile2, RadiataStories, + SMTNocturne, TitleCount, }; @@ -95,6 +97,7 @@ public: enum Flags { PointListPalette = 1, + ZWriteMustNotClear = 2, }; struct Game diff --git a/plugins/GSdx/GSDevice.cpp b/plugins/GSdx/GSDevice.cpp index 063aab8460..db34523630 100644 --- a/plugins/GSdx/GSDevice.cpp +++ b/plugins/GSdx/GSDevice.cpp @@ -44,7 +44,7 @@ bool GSDevice::Create(GSWnd* wnd, bool vsync) return true; } -bool GSDevice::Reset(int w, int h, bool fs) +bool GSDevice::Reset(int w, int h, int mode) { for(list::iterator i = m_pool.begin(); i != m_pool.end(); i++) { @@ -74,13 +74,15 @@ void GSDevice::Present(const GSVector4i& r, int shader) { GSVector4i cr = m_wnd->GetClientRect(); - // Skip Presentation if the surface is invisible (minimized or hidden); prevents DX null swapchain crashes. - if(cr.width() == 0 || cr.height() == 0) - return; + int w = std::max(cr.width(), 1); + int h = std::max(cr.height(), 1); - if(m_backbuffer->GetWidth() != cr.width() || m_backbuffer->GetHeight() != cr.height()) + if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { - Reset(cr.width(), cr.height(), false); + if(!Reset(w, h, DontCare)) + { + return; + } } ClearRenderTarget(m_backbuffer, 0); @@ -225,4 +227,22 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse { m_current = m_merge; } -} \ No newline at end of file +} + +bool GSDevice::ResizeTexture(GSTexture** t, int w, int h) +{ + if(t == NULL) {ASSERT(0); return false;} + + GSTexture* t2 = *t; + + if(t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h) + { + delete t2; + + t2 = CreateTexture(w, h); + + *t = t2; + } + + return t2 != NULL; +} diff --git a/plugins/GSdx/GSDevice.h b/plugins/GSdx/GSDevice.h index a5d36775a3..cc374dabae 100644 --- a/plugins/GSdx/GSDevice.h +++ b/plugins/GSdx/GSDevice.h @@ -73,10 +73,11 @@ public: void Recycle(GSTexture* t); - virtual bool Create(GSWnd* wnd, bool vsync); - virtual bool Reset(int w, int h, bool fs); + enum {Windowed, Fullscreen, DontCare}; - virtual bool IsLost() {return false;} + virtual bool Create(GSWnd* wnd, bool vsync); + virtual bool Reset(int w, int h, int mode); + virtual bool IsLost(bool update = false) {return false;} virtual void Present(const GSVector4i& r, int shader); virtual void Flip() {} @@ -105,6 +106,8 @@ public: void Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c); void Interlace(const GSVector2i& ds, int field, int mode, float yoffset); + bool ResizeTexture(GSTexture** t, int w, int h); + template void PrepareShaderMacro(vector& dst, const T* src, const char* model) { dst.clear(); diff --git a/plugins/GSdx/GSDevice10.cpp b/plugins/GSdx/GSDevice10.cpp index 6d7d2b73ec..b5778f23f7 100644 --- a/plugins/GSdx/GSDevice10.cpp +++ b/plugins/GSdx/GSDevice10.cpp @@ -207,16 +207,16 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync) // - Reset(1, 1, false); + Reset(1, 1, Windowed); // return true; } -bool GSDevice10::Reset(int w, int h, bool fs) +bool GSDevice10::Reset(int w, int h, int mode) { - if(!__super::Reset(w, h, fs)) + if(!__super::Reset(w, h, mode)) return false; DXGI_SWAP_CHAIN_DESC scd; @@ -504,7 +504,7 @@ void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t c m_vertices.vb = NULL; m_vertices.start = 0; m_vertices.count = 0; - m_vertices.limit = max(count * 3 / 2, 10000); + m_vertices.limit = std::max(count * 3 / 2, 10000); } if(m_vertices.vb == NULL) diff --git a/plugins/GSdx/GSDevice10.h b/plugins/GSdx/GSDevice10.h index 5739532105..fd14220537 100644 --- a/plugins/GSdx/GSDevice10.h +++ b/plugins/GSdx/GSDevice10.h @@ -96,8 +96,7 @@ public: virtual ~GSDevice10(); bool Create(GSWnd* wnd, bool vsync); - bool Reset(int w, int h, bool fs); - + bool Reset(int w, int h, int mode); void Flip(); void BeginScene(); diff --git a/plugins/GSdx/GSDevice11.cpp b/plugins/GSdx/GSDevice11.cpp index c31c0a8d92..98a1bd96af 100644 --- a/plugins/GSdx/GSDevice11.cpp +++ b/plugins/GSdx/GSDevice11.cpp @@ -244,16 +244,16 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync) // - Reset(1, 1, false); + Reset(1, 1, Windowed); // return true; } -bool GSDevice11::Reset(int w, int h, bool fs) +bool GSDevice11::Reset(int w, int h, int mode) { - if(!__super::Reset(w, h, fs)) + if(!__super::Reset(w, h, mode)) return false; DXGI_SWAP_CHAIN_DESC scd; @@ -541,7 +541,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c m_vertices.vb = NULL; m_vertices.start = 0; m_vertices.count = 0; - m_vertices.limit = max(count * 3 / 2, 10000); + m_vertices.limit = std::max(count * 3 / 2, 10000); } if(m_vertices.vb == NULL) diff --git a/plugins/GSdx/GSDevice11.h b/plugins/GSdx/GSDevice11.h index 2fa406e941..bb3304cc47 100644 --- a/plugins/GSdx/GSDevice11.h +++ b/plugins/GSdx/GSDevice11.h @@ -99,8 +99,7 @@ public: virtual ~GSDevice11(); bool Create(GSWnd* wnd, bool vsync); - bool Reset(int w, int h, bool fs); - + bool Reset(int w, int h, int mode); void Flip(); void BeginScene(); diff --git a/plugins/GSdx/GSDevice7.cpp b/plugins/GSdx/GSDevice7.cpp index 2481fe5c0f..255de0f331 100644 --- a/plugins/GSdx/GSDevice7.cpp +++ b/plugins/GSdx/GSDevice7.cpp @@ -25,6 +25,7 @@ #include "GSDevice7.h" GSDevice7::GSDevice7() + : m_lost(false) { } @@ -75,14 +76,14 @@ bool GSDevice7::Create(GSWnd* wnd, bool vsync) return false; } - Reset(1, 1, false); + Reset(1, 1, Windowed); return true; } -bool GSDevice7::Reset(int w, int h, bool fs) +bool GSDevice7::Reset(int w, int h, int mode) { - if(!__super::Reset(w, h, fs)) + if(!__super::Reset(w, h, mode)) return false; DDSURFACEDESC2 desc; @@ -130,6 +131,8 @@ bool GSDevice7::Reset(int w, int h, bool fs) } } + m_lost = false; + return true; } @@ -139,9 +142,15 @@ void GSDevice7::Present(const GSVector4i& r, int shader) GSVector4i cr = m_wnd->GetClientRect(); - if(m_backbuffer->GetWidth() != cr.width() || m_backbuffer->GetHeight() != cr.height()) + int w = std::max(cr.width(), 1); + int h = std::max(cr.height(), 1); + + if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { - Reset(cr.width(), cr.height(), false); + if(!Reset(w, h, DontCare)) + { + return; + } } CComPtr backbuffer = *(GSTexture7*)m_backbuffer; @@ -157,7 +166,10 @@ void GSDevice7::Present(const GSVector4i& r, int shader) GSVector4i r2 = r; - hr = backbuffer->Blt(r2, *(GSTexture7*)m_merge, NULL, DDBLT_WAIT, NULL); + if(m_current) + { + hr = backbuffer->Blt(r2, *(GSTexture7*)m_current, NULL, DDBLT_WAIT, NULL); + } // if ClearRenderTarget was implemented the parent class could handle these tasks until this point @@ -174,6 +186,8 @@ void GSDevice7::Present(const GSVector4i& r, int shader) if(hr == DDERR_SURFACELOST) { + m_lost = true; + // TODO HRESULT hr = m_dd->TestCooperativeLevel(); diff --git a/plugins/GSdx/GSDevice7.h b/plugins/GSdx/GSDevice7.h index 3e6f4c37af..43bbf2a5b9 100644 --- a/plugins/GSdx/GSDevice7.h +++ b/plugins/GSdx/GSDevice7.h @@ -29,6 +29,7 @@ class GSDevice7 : public GSDevice private: CComPtr m_dd; CComPtr m_primary; + bool m_lost; GSTexture* Create(int type, int w, int h, int format); @@ -40,7 +41,7 @@ public: virtual ~GSDevice7(); bool Create(GSWnd* wnd, bool vsync); - bool Reset(int w, int h, bool fs); - + bool Reset(int w, int h, int mode); + bool IsLost(bool update) {return m_lost;} void Present(const GSVector4i& r, int shader); }; diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index 5aa0363098..99788516bd 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -42,6 +42,7 @@ GSDevice9::GSDevice9() , m_bf(0xffffffff) , m_rtv(NULL) , m_dsv(NULL) + , m_lost(false) { memset(&m_pp, 0, sizeof(m_pp)); memset(&m_ddcaps, 0, sizeof(m_ddcaps)); @@ -105,9 +106,7 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync) m_d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &m_d3dcaps); - bool fs = theApp.GetConfig("ModeWidth", 0) > 0; - - if(!Reset(1, 1, fs)) return false; + if(!Reset(1, 1, theApp.GetConfig("ModeWidth", 0) > 0 ? Fullscreen : Windowed)) return false; m_dev->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0); @@ -188,35 +187,48 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync) return true; } -bool GSDevice9::Reset(int w, int h, bool fs) +bool GSDevice9::Reset(int w, int h, int mode) { - if(!__super::Reset(w, h, fs)) + if(!__super::Reset(w, h, mode)) return false; HRESULT hr; if(!m_d3d) return false; - if(m_swapchain && !fs && m_pp.Windowed) + if(mode == DontCare) { - m_swapchain = NULL; + mode = m_pp.Windowed ? Windowed : Fullscreen; + } - m_pp.BackBufferWidth = w; - m_pp.BackBufferHeight = h; + if(!m_lost) + { + if(m_swapchain && mode != Fullscreen && m_pp.Windowed) + { + m_swapchain = NULL; - hr = m_dev->CreateAdditionalSwapChain(&m_pp, &m_swapchain); + m_pp.BackBufferWidth = w; + m_pp.BackBufferHeight = h; - if(FAILED(hr)) return false; + hr = m_dev->CreateAdditionalSwapChain(&m_pp, &m_swapchain); - CComPtr backbuffer; - hr = m_swapchain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &backbuffer); - m_backbuffer = new GSTexture9(backbuffer); + if(FAILED(hr)) return false; - return true; + CComPtr backbuffer; + hr = m_swapchain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &backbuffer); + m_backbuffer = new GSTexture9(backbuffer); + + return true; + } } m_swapchain = NULL; - + + m_vertices.vb = NULL; + m_vertices.vb_old = NULL; + m_vertices.start = 0; + m_vertices.count = 0; + if(m_vs_cb) _aligned_free(m_vs_cb); if(m_ps_cb) _aligned_free(m_ps_cb); @@ -258,7 +270,7 @@ bool GSDevice9::Reset(int w, int h, bool fs) int mh = theApp.GetConfig("ModeHeight", 0); int mrr = theApp.GetConfig("ModeRefreshRate", 0); - if(fs && mw > 0 && mh > 0 && mrr >= 0) + if(mode == Fullscreen && mw > 0 && mh > 0 && mrr >= 0) { m_pp.Windowed = FALSE; m_pp.BackBufferWidth = mw; @@ -324,27 +336,39 @@ bool GSDevice9::Reset(int w, int h, bool fs) return true; } -bool GSDevice9::IsLost() +bool GSDevice9::IsLost(bool update) { - HRESULT hr = m_dev->TestCooperativeLevel(); - - return hr == D3DERR_DEVICELOST || hr == D3DERR_DEVICENOTRESET; + if(!m_lost || update) + { + HRESULT hr = m_dev->TestCooperativeLevel(); + + m_lost = hr == D3DERR_DEVICELOST || hr == D3DERR_DEVICENOTRESET; + } + + return m_lost; } void GSDevice9::Flip() { m_dev->EndScene(); + HRESULT hr; + if(m_swapchain) { - m_swapchain->Present(NULL, NULL, NULL, NULL, 0); + hr = m_swapchain->Present(NULL, NULL, NULL, NULL, 0); } else { - m_dev->Present(NULL, NULL, NULL, NULL); + hr = m_dev->Present(NULL, NULL, NULL, NULL); } m_dev->BeginScene(); + + if(FAILED(hr)) + { + m_lost = true; + } } void GSDevice9::BeginScene() @@ -663,7 +687,7 @@ void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t co m_vertices.vb = NULL; m_vertices.start = 0; m_vertices.count = 0; - m_vertices.limit = max(count * 3 / 2, 10000); + m_vertices.limit = std::max(count * 3 / 2, 10000); } if(m_vertices.vb == NULL) diff --git a/plugins/GSdx/GSDevice9.h b/plugins/GSdx/GSDevice9.h index 741c4c4296..4a34533ae3 100644 --- a/plugins/GSdx/GSDevice9.h +++ b/plugins/GSdx/GSDevice9.h @@ -95,6 +95,7 @@ private: CComPtr m_d3d; CComPtr m_dev; CComPtr m_swapchain; + bool m_lost; struct { @@ -132,9 +133,8 @@ public: virtual ~GSDevice9(); bool Create(GSWnd* wnd, bool vsync); - bool Reset(int w, int h, bool fs); - - bool IsLost(); + bool Reset(int w, int h, int mode); + bool IsLost(bool update); void Flip(); void BeginScene(); diff --git a/plugins/GSdx/GSDeviceNull.cpp b/plugins/GSdx/GSDeviceNull.cpp index a2bca4dd4f..473782856f 100644 --- a/plugins/GSdx/GSDeviceNull.cpp +++ b/plugins/GSdx/GSDeviceNull.cpp @@ -29,14 +29,14 @@ bool GSDeviceNull::Create(GSWnd* wnd, bool vsync) return false; } - Reset(1, 1, false); + Reset(1, 1, Windowed); return true; } -bool GSDeviceNull::Reset(int w, int h, bool fs) +bool GSDeviceNull::Reset(int w, int h, int mode) { - if(!__super::Reset(w, h, fs)) + if(!__super::Reset(w, h, mode)) return false; return true; diff --git a/plugins/GSdx/GSDeviceNull.h b/plugins/GSdx/GSDeviceNull.h index 3ce1c43885..b69841d70a 100644 --- a/plugins/GSdx/GSDeviceNull.h +++ b/plugins/GSdx/GSDeviceNull.h @@ -36,5 +36,5 @@ public: GSDeviceNull() {} bool Create(GSWnd* wnd, bool vsync); - bool Reset(int w, int h, bool fs); + bool Reset(int w, int h, int mode); }; diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index fd36ded466..db35e185ee 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -150,14 +150,14 @@ bool GSDeviceOGL::Create(GSWnd* wnd, bool vsync) */ GSVector4i r = wnd->GetClientRect(); - Reset(r.width(), r.height(), false); + Reset(r.width(), r.height(), Windowed); return true; } -bool GSDeviceOGL::Reset(int w, int h, bool fs) +bool GSDeviceOGL::Reset(int w, int h, int mode) { - if(!__super::Reset(w, h, fs)) + if(!__super::Reset(w, h, mode)) return false; glCullFace(GL_FRONT_AND_BACK); CheckError(); @@ -361,7 +361,7 @@ void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t stride, size_t { m_vertices.start = 0; m_vertices.count = 0; - m_vertices.limit = max(count * 3 / 2, 10000); + m_vertices.limit = std::max(count * 3 / 2, 10000); growbuffer = true; } diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 850056120c..35080854ad 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -110,8 +110,7 @@ public: virtual ~GSDeviceOGL(); bool Create(GSWnd* wnd, bool vsync); - bool Reset(int w, int h, bool fs); - + bool Reset(int w, int h, int mode); void Present(const GSVector4i& r, int shader); void Flip(); diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index 05cf43b328..96f1ed4371 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -48,8 +48,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) m_env.vm = p->vm; m_env.fbr = p->fbo->row; m_env.zbr = p->zbo->row; - m_env.fbc = p->fbo->col; - m_env.zbc = p->zbo->col; + m_env.fbc = p->fbo->col[0]; + m_env.zbc = p->zbo->col[0]; m_env.fzbr = p->fzbo->row; m_env.fzbc = p->fzbo->col; m_env.fm = GSVector4i(p->fm); @@ -116,8 +116,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) m_env.t.mask.u32[0] = 0; break; case CLAMP_REGION_CLAMP: - m_env.t.min.u16[0] = min(context->CLAMP.MINU, tw - 1); - m_env.t.max.u16[0] = min(context->CLAMP.MAXU, tw - 1); + m_env.t.min.u16[0] = std::min(context->CLAMP.MINU, tw - 1); + m_env.t.max.u16[0] = std::min(context->CLAMP.MAXU, tw - 1); m_env.t.mask.u32[0] = 0; break; case CLAMP_REGION_REPEAT: @@ -142,8 +142,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) m_env.t.mask.u32[2] = 0; break; case CLAMP_REGION_CLAMP: - m_env.t.min.u16[4] = min(context->CLAMP.MINV, th - 1); - m_env.t.max.u16[4] = min(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256) + m_env.t.min.u16[4] = std::min(context->CLAMP.MINV, th - 1); + m_env.t.max.u16[4] = std::min(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256) m_env.t.mask.u32[2] = 0; break; case CLAMP_REGION_REPEAT: @@ -224,22 +224,22 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) { if(m == 0) { - DrawSolidRectT(m_env.zbr, m_env.zbc[0], r, z, m); + DrawSolidRectT(m_env.zbr, m_env.zbc, r, z, m); } else { - DrawSolidRectT(m_env.zbr, m_env.zbc[0], r, z, m); + DrawSolidRectT(m_env.zbr, m_env.zbc, r, z, m); } } else { if(m == 0) { - DrawSolidRectT(m_env.zbr, m_env.zbc[0], r, z, m); + DrawSolidRectT(m_env.zbr, m_env.zbc, r, z, m); } else { - DrawSolidRectT(m_env.zbr, m_env.zbc[0], r, z, m); + DrawSolidRectT(m_env.zbr, m_env.zbc, r, z, m); } } } @@ -259,11 +259,11 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) { if(m == 0) { - DrawSolidRectT(m_env.fbr, m_env.fbc[0], r, c, m); + DrawSolidRectT(m_env.fbr, m_env.fbc, r, c, m); } else { - DrawSolidRectT(m_env.fbr, m_env.fbc[0], r, c, m); + DrawSolidRectT(m_env.fbr, m_env.fbc, r, c, m); } } else @@ -272,18 +272,18 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) if(m == 0) { - DrawSolidRectT(m_env.fbr, m_env.fbc[0], r, c, m); + DrawSolidRectT(m_env.fbr, m_env.fbc, r, c, m); } else { - DrawSolidRectT(m_env.fbr, m_env.fbc[0], r, c, m); + DrawSolidRectT(m_env.fbr, m_env.fbc, r, c, m); } } } } template -void GSDrawScanline::DrawSolidRectT(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m) +void GSDrawScanline::DrawSolidRectT(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m) { if(m == 0xffffffff) return; @@ -320,13 +320,13 @@ void GSDrawScanline::DrawSolidRectT(const GSVector4i* row, int* col, const GSVec } template -void GSDrawScanline::FillRect(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m) +void GSDrawScanline::FillRect(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m) { if(r.x >= r.z) return; for(int y = r.y; y < r.w; y++) { - uint32 base = row[y].x; + uint32 base = row[y]; for(int x = r.x; x < r.z; x++) { @@ -338,13 +338,13 @@ void GSDrawScanline::FillRect(const GSVector4i* row, int* col, const GSVector4i& } template -void GSDrawScanline::FillBlock(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m) +void GSDrawScanline::FillBlock(const int* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m) { if(r.x >= r.z) return; for(int y = r.y; y < r.w; y += 8) { - uint32 base = row[y].x; + uint32 base = row[y]; for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) { diff --git a/plugins/GSdx/GSDrawScanline.h b/plugins/GSdx/GSDrawScanline.h index 9f9a1ea3d1..9253f322bd 100644 --- a/plugins/GSdx/GSDrawScanline.h +++ b/plugins/GSdx/GSDrawScanline.h @@ -60,13 +60,13 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v); template - void DrawSolidRectT(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m); + void DrawSolidRectT(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m); template - __forceinline void FillRect(const GSVector4i* row, int* col, const GSVector4i& r, uint32 c, uint32 m); + __forceinline void FillRect(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m); template - __forceinline void FillBlock(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m); + __forceinline void FillBlock(const int* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m); protected: GSState* m_state; diff --git a/plugins/GSdx/GSLocalMemory.cpp b/plugins/GSdx/GSLocalMemory.cpp index 7d5a78fbed..a83db870c6 100644 --- a/plugins/GSdx/GSLocalMemory.cpp +++ b/plugins/GSdx/GSLocalMemory.cpp @@ -28,19 +28,21 @@ #include "GSLocalMemory.h" #define ASSERT_BLOCK(r, w, h) \ - ASSERT((r).width() >= w && (r).height() >= h && !((r).left&(w-1)) && !((r).top&(h-1)) && !((r).right&(w-1)) && !((r).bottom&(h-1))); \ + ASSERT((r).width() >= w && (r).height() >= h && !((r).left & (w - 1)) && !((r).top & (h - 1)) && !((r).right & (w - 1)) && !((r).bottom & (h - 1))); \ -#define FOREACH_BLOCK_START(w, h, bpp, format) \ - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[format]; \ - uint32 bp = TEX0.TBP0; \ - uint32 bw = TEX0.TBW; \ - int offset = dstpitch * h - r.width() * bpp / 8; \ - for(int y = r.top, ye = r.bottom; y < ye; y += h, dst += offset) \ - { ASSERT_BLOCK(r, w, h); \ - uint32 base = psm.bn(0, y, bp, bw); \ - for(int x = r.left, xe = r.right; x < xe; x += w, dst += w * bpp / 8) \ +#define FOREACH_BLOCK_START(r, w, h, bpp, psm) \ + ASSERT_BLOCK(r, w, h); \ + const GSLocalMemory::BlockOffset* RESTRICT _bo = GetBlockOffset(TEX0.TBP0, TEX0.TBW, psm); \ + GSVector4i _r = r >> 3; \ + uint8* _dst = dst - _r.left * bpp; \ + int _offset = dstpitch * h; \ + for(int y = _r.top; y < _r.bottom; y += h >> 3, _dst += _offset) \ + { \ + uint32 _base = _bo->row[y]; \ + for(int x = _r.left; x < _r.right; x += w >> 3) \ { \ - const uint8* src = BlockPtr(base + psm.blockOffset[x >> 3]); \ + const uint8* src = BlockPtr(_base + _bo->col[x]); \ + uint8* dst = &_dst[x * bpp]; \ #define FOREACH_BLOCK_END }} @@ -463,37 +465,65 @@ GSLocalMemory::~GSLocalMemory() { VirtualFree(m_vm8, 0, MEM_RELEASE); - for(hash_map::iterator i = m_omap.begin(); i != m_omap.end(); i++) + for(hash_map::iterator i = m_bomap.begin(); i != m_bomap.end(); i++) { - Offset* o = i->second; - - _aligned_free(o->col[0]); - - _aligned_free(o); + _aligned_free(i->second); } - for(hash_map::iterator i = m_o4map.begin(); i != m_o4map.end(); i++) + for(hash_map::iterator i = m_pomap.begin(); i != m_pomap.end(); i++) + { + _aligned_free(i->second); + } + + for(hash_map::iterator i = m_po4map.begin(); i != m_po4map.end(); i++) { _aligned_free(i->second); } } -GSLocalMemory::Offset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm) +GSLocalMemory::BlockOffset* GSLocalMemory::GetBlockOffset(uint32 bp, uint32 bw, uint32 psm) { - if(bw == 0) {ASSERT(0); return NULL;} - - ASSERT(m_psm[psm].bpp > 8); // only for 16/24/32/8h/4hh/4hl formats where all columns are the same - uint32 hash = bp | (bw << 14) | (psm << 20); - hash_map::iterator i = m_omap.find(hash); + hash_map::iterator i = m_bomap.find(hash); - if(i != m_omap.end()) + if(i != m_bomap.end()) { return i->second; } - Offset* o = (Offset*)_aligned_malloc(sizeof(Offset), 16); + BlockOffset* o = (BlockOffset*)_aligned_malloc(sizeof(BlockOffset), 16); + + o->hash = hash; + + pixelAddress bn = m_psm[psm].bn; + + for(int i = 0; i < 256; i++) + { + o->row[i] = (int)bn(0, i << 3, bp, bw); + } + + o->col = m_psm[psm].blockOffset; + + m_bomap[hash] = o; + + return o; +} + +GSLocalMemory::PixelOffset* GSLocalMemory::GetPixelOffset(uint32 bp, uint32 bw, uint32 psm) +{ + if(bw == 0) {ASSERT(0); return NULL;} + + uint32 hash = bp | (bw << 14) | (psm << 20); + + hash_map::iterator i = m_pomap.find(hash); + + if(i != m_pomap.end()) + { + return i->second; + } + + PixelOffset* o = (PixelOffset*)_aligned_malloc(sizeof(PixelOffset), 16); o->hash = hash; @@ -501,24 +531,20 @@ GSLocalMemory::Offset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm for(int i = 0; i < 2048; i++) { - o->row[i] = GSVector4i((int)pa(0, i, bp, bw)); + o->row[i] = (int)pa(0, i, bp, bw); } - int* p = (int*)_aligned_malloc(sizeof(int) * (2048 + 3) * 4, 16); - - for(int i = 0; i < 4; i++) + for(int i = 0; i < 8; i++) { - o->col[i] = &p[2048 * i + ((4 - (i & 3)) & 3)]; - - memcpy(o->col[i], m_psm[psm].rowOffset[0], sizeof(int) * 2048); + o->col[i] = m_psm[psm].rowOffset[i]; } - m_omap[hash] = o; + m_pomap[hash] = o; return o; } -GSLocalMemory::Offset4* GSLocalMemory::GetOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF) +GSLocalMemory::PixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF) { uint32 fbp = FRAME.Block(); uint32 zbp = ZBUF.Block(); @@ -535,14 +561,14 @@ GSLocalMemory::Offset4* GSLocalMemory::GetOffset4(const GIFRegFRAME& FRAME, cons uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28); - hash_map::iterator i = m_o4map.find(hash); + hash_map::iterator i = m_po4map.find(hash); - if(i != m_o4map.end()) + if(i != m_po4map.end()) { return i->second; } - Offset4* o = (Offset4*)_aligned_malloc(sizeof(Offset4), 16); + PixelOffset4* o = (PixelOffset4*)_aligned_malloc(sizeof(PixelOffset4), 16); o->hash = hash; @@ -564,7 +590,7 @@ GSLocalMemory::Offset4* GSLocalMemory::GetOffset4(const GIFRegFRAME& FRAME, cons o->col[i].y = m_psm[zpsm].rowOffset[0][i * 4] << zs; } - m_o4map[hash] = o; + m_po4map[hash] = o; return o; } @@ -1436,20 +1462,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB /////////////////// -void GSLocalMemory::ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMCT32) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT32) { ReadBlock32(src, dst, dstpitch); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { if(TEXA.AEM) { - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMCT24) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT24) { ReadAndExpandBlock24(src, dst, dstpitch, TEXA); } @@ -1457,7 +1483,7 @@ void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, } else { - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMCT24) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT24) { ReadAndExpandBlock24(src, dst, dstpitch, TEXA); } @@ -1465,11 +1491,11 @@ void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, } } -void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { __declspec(align(16)) uint16 block[16 * 8]; - FOREACH_BLOCK_START(16, 8, 32, PSM_PSMCT16) + FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMCT16) { ReadBlock16(src, (uint8*)block, sizeof(block) / 8); @@ -1478,11 +1504,11 @@ void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { __declspec(align(16)) uint16 block[16 * 8]; - FOREACH_BLOCK_START(16, 8, 32, PSM_PSMCT16S) + FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMCT16S) { ReadBlock16(src, (uint8*)block, sizeof(block) / 8); @@ -1491,75 +1517,75 @@ void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint32* pal = m_clut; - FOREACH_BLOCK_START(16, 16, 32, PSM_PSMT8) + FOREACH_BLOCK_START(r, 16, 16, 32, PSM_PSMT8) { ReadAndExpandBlock8_32(src, dst, dstpitch, pal); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint64* pal = m_clut; - FOREACH_BLOCK_START(32, 16, 32, PSM_PSMT4) + FOREACH_BLOCK_START(r, 32, 16, 32, PSM_PSMT4) { ReadAndExpandBlock4_32(src, dst, dstpitch, pal); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint32* pal = m_clut; - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT8H) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT8H) { ReadAndExpandBlock8H_32(src, dst, dstpitch, pal); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint32* pal = m_clut; - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HL) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HL) { ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint32* pal = m_clut; - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HH) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HH) { ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMZ32) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ32) { ReadBlock32(src, dst, dstpitch); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { if(TEXA.AEM) { - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMZ24) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ24) { ReadAndExpandBlock24(src, dst, dstpitch, TEXA); } @@ -1567,7 +1593,7 @@ void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch } else { - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMZ24) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ24) { ReadAndExpandBlock24(src, dst, dstpitch, TEXA); } @@ -1575,11 +1601,11 @@ void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch } } -void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { __declspec(align(16)) uint16 block[16 * 8]; - FOREACH_BLOCK_START(16, 8, 32, PSM_PSMZ16) + FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMZ16) { ReadBlock16(src, (uint8*)block, sizeof(block) / 8); @@ -1588,11 +1614,11 @@ void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { __declspec(align(16)) uint16 block[16 * 8]; - FOREACH_BLOCK_START(16, 8, 32, PSM_PSMZ16S) + FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMZ16S) { ReadBlock16(src, (uint8*)block, sizeof(block) / 8); @@ -1737,31 +1763,31 @@ void GSLocalMemory::ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, c } /////////////////// -void GSLocalMemory::ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(16, 8, 16, PSM_PSMCT16) + FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMCT16) { ReadBlock16(src, dst, dstpitch); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(16, 8, 16, PSM_PSMCT16S) + FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMCT16S) { ReadBlock16(src, dst, dstpitch); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint32* pal = m_clut; if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) { - FOREACH_BLOCK_START(16, 16, 32, PSM_PSMT8) + FOREACH_BLOCK_START(r, 16, 16, 32, PSM_PSMT8) { ReadAndExpandBlock8_32(src, dst, dstpitch, pal); } @@ -1773,7 +1799,7 @@ void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch __declspec(align(16)) uint8 block[16 * 16]; - FOREACH_BLOCK_START(16, 16, 16, PSM_PSMT8) + FOREACH_BLOCK_START(r, 16, 16, 16, PSM_PSMT8) { ReadBlock8(src, (uint8*)block, sizeof(block) / 16); @@ -1783,13 +1809,13 @@ void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch } } -void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint64* pal = m_clut; if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) { - FOREACH_BLOCK_START(32, 16, 32, PSM_PSMT4) + FOREACH_BLOCK_START(r, 32, 16, 32, PSM_PSMT4) { ReadAndExpandBlock4_32(src, dst, dstpitch, pal); } @@ -1801,7 +1827,7 @@ void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch __declspec(align(16)) uint8 block[(32 / 2) * 16]; - FOREACH_BLOCK_START(32, 16, 16, PSM_PSMT4) + FOREACH_BLOCK_START(r, 32, 16, 16, PSM_PSMT4) { ReadBlock4(src, (uint8*)block, sizeof(block) / 16); @@ -1811,13 +1837,13 @@ void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch } } -void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint32* pal = m_clut; if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) { - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT8H) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT8H) { ReadAndExpandBlock8H_32(src, dst, dstpitch, pal); } @@ -1829,7 +1855,7 @@ void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitc __declspec(align(16)) uint32 block[8 * 8]; - FOREACH_BLOCK_START(8, 8, 16, PSM_PSMT8H) + FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT8H) { ReadBlock32(src, (uint8*)block, sizeof(block) / 8); @@ -1839,13 +1865,13 @@ void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitc } } -void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint32* pal = m_clut; if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) { - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HL) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HL) { ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal); } @@ -1857,7 +1883,7 @@ void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpit __declspec(align(16)) uint32 block[8 * 8]; - FOREACH_BLOCK_START(8, 8, 16, PSM_PSMT4HL) + FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT4HL) { ReadBlock32(src, (uint8*)block, sizeof(block) / 8); @@ -1867,13 +1893,13 @@ void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpit } } -void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { const uint32* pal = m_clut; if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) { - FOREACH_BLOCK_START(8, 8, 32, PSM_PSMT4HH) + FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HH) { ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal); } @@ -1885,7 +1911,7 @@ void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpit __declspec(align(16)) uint32 block[8 * 8]; - FOREACH_BLOCK_START(8, 8, 16, PSM_PSMT4HH) + FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT4HH) { ReadBlock32(src, (uint8*)block, sizeof(block) / 8); @@ -1895,18 +1921,18 @@ void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpit } } -void GSLocalMemory::ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(16, 8, 16, PSM_PSMZ16) + FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMZ16) { ReadBlock16(src, dst, dstpitch); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(16, 8, 16, PSM_PSMZ16S) + FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMZ16S) { ReadBlock16(src, dst, dstpitch); } @@ -1959,45 +1985,45 @@ void GSLocalMemory::ReadTextureNP(const GSVector4i& r, uint8* dst, int dstpitch, // 32/8 -void GSLocalMemory::ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(16, 16, 8, PSM_PSMT8) + FOREACH_BLOCK_START(r, 16, 16, 8, PSM_PSMT8) { ReadBlock8(src, dst, dstpitch); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(32, 16, 8, PSM_PSMT4) + FOREACH_BLOCK_START(r, 32, 16, 8, PSM_PSMT4) { ReadBlock4P(src, dst, dstpitch); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(8, 8, 8, PSM_PSMT8H) + FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT8H) { ReadBlock8HP(src, dst, dstpitch); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(8, 8, 8, PSM_PSMT4HL) + FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT4HL) { ReadBlock4HLP(src, dst, dstpitch); } FOREACH_BLOCK_END } -void GSLocalMemory::ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const +void GSLocalMemory::ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { - FOREACH_BLOCK_START(8, 8, 8, PSM_PSMT4HH) + FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT4HH) { ReadBlock4HHP(src, dst, dstpitch); } diff --git a/plugins/GSdx/GSLocalMemory.h b/plugins/GSdx/GSLocalMemory.h index a09e972c8d..8bea5fa60f 100644 --- a/plugins/GSdx/GSLocalMemory.h +++ b/plugins/GSdx/GSLocalMemory.h @@ -43,7 +43,7 @@ public: typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const; typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const; - typedef void (GSLocalMemory::*readTexture)(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; + typedef void (GSLocalMemory::*readTexture)(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; typedef union @@ -81,14 +81,21 @@ public: GSClut m_clut; - struct Offset + struct BlockOffset { - GSVector4i row[2048]; // 0 | 0 | 0 | 0 - int* col[4]; // x | x+1 | x+2 | x+3 + int row[256]; // yn (n = 0 8 16 ...) + int* col; // blockOffset* uint32 hash; }; - struct Offset4 + struct PixelOffset + { + int row[2048]; // yn (n = 0 1 2 ...) + int* col[8]; // rowOffset* + uint32 hash; + }; + + struct PixelOffset4 { // 16 bit offsets (m_vm16[...]) @@ -141,15 +148,17 @@ protected: // - hash_map m_omap; - hash_map m_o4map; + hash_map m_bomap; + hash_map m_pomap; + hash_map m_po4map; public: GSLocalMemory(); virtual ~GSLocalMemory(); - Offset* GetOffset(uint32 bp, uint32 bw, uint32 psm); - Offset4* GetOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); + BlockOffset* GetBlockOffset(uint32 bp, uint32 bw, uint32 psm); + PixelOffset* GetPixelOffset(uint32 bp, uint32 bw, uint32 psm); + PixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); // address @@ -781,19 +790,19 @@ public: // * => 32 - void ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; + void ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); void ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); @@ -813,25 +822,25 @@ public: // * => 32/16 - void ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; + void ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); void ReadTextureNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); // pal ? 8 : 32 - void ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; - void ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; + void ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + void ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const; diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index abd730281a..de1d0e5e10 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -290,8 +290,8 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor) GSVector4 tb = l.p.upl(v[2].p).ceil(); - GSVector4 tbmax = tb.maxv(fscissor.yyyy()); - GSVector4 tbmin = tb.minv(fscissor.wwww()); + GSVector4 tbmax = tb.max(fscissor.yyyy()); + GSVector4 tbmin = tb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin)); @@ -342,8 +342,8 @@ void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor) GSVector4 tb = l.p.upl(v[2].p).ceil(); - GSVector4 tbmax = tb.maxv(fscissor.yyyy()); - GSVector4 tbmin = tb.minv(fscissor.wwww()); + GSVector4 tbmax = tb.max(fscissor.yyyy()); + GSVector4 tbmin = tb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.zzww(tbmin)); @@ -398,8 +398,8 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso GSVector4 tb = v[0].p.upl(v[1].p).zwzw(v[1].p.upl(v[2].p)).ceil(); - GSVector4 tbmax = tb.maxv(fscissor.yyyy()); - GSVector4 tbmin = tb.minv(fscissor.wwww()); + GSVector4 tbmax = tb.max(fscissor.yyyy()); + GSVector4 tbmin = tb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.xzyw(tbmin)); @@ -462,8 +462,8 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const { GSVector4 lr = l.p.xyxy(r).ceil(); - GSVector4 lrmax = lr.maxv(fscissor.xxxx()); - GSVector4 lrmin = lr.minv(fscissor.zzzz()); + GSVector4 lrmax = lr.max(fscissor.xxxx()); + GSVector4 lrmin = lr.min(fscissor.zzzz()); GSVector4i lri = GSVector4i(lrmax.xxzz(lrmin)); @@ -503,8 +503,8 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const { GSVector4 lr = l.p.ceil(); - GSVector4 lrmax = lr.maxv(fscissor.xxxx()); - GSVector4 lrmin = lr.minv(fscissor.zzzz()); + GSVector4 lrmax = lr.max(fscissor.xxxx()); + GSVector4 lrmin = lr.min(fscissor.zzzz()); GSVector4i lri = GSVector4i(lrmax.xxyy(lrmin)); @@ -613,8 +613,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS if(orientation) { - GSVector4 tbmax = lrtb.maxv(fscissor.yyyy()); - GSVector4 tbmin = lrtb.minv(fscissor.wwww()); + GSVector4 tbmax = lrtb.max(fscissor.yyyy()); + GSVector4 tbmin = lrtb.min(fscissor.wwww()); GSVector4i tbi = GSVector4i(tbmax.zwzw(tbmin)); @@ -711,8 +711,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS } else { - GSVector4 lrmax = lrtb.maxv(fscissor.xxxx()); - GSVector4 lrmin = lrtb.minv(fscissor.zzzz()); + GSVector4 lrmax = lrtb.max(fscissor.xxxx()); + GSVector4 lrmin = lrtb.min(fscissor.zzzz()); GSVector4i lri = GSVector4i(lrmax.xyxy(lrmin)); diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index b83f9cdcf9..f442333eb7 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -259,9 +259,17 @@ void GSRenderer::VSync(int field) Flush(); - field = field ? 1 : 0; - - if(!Merge(field)) return; + if(!m_dev->IsLost(true)) + { + if(!Merge(field ? 1 : 0)) + { + return; + } + } + else + { + ResetDevice(); + } // osd @@ -311,11 +319,6 @@ void GSRenderer::VSync(int field) // present - if(m_dev->IsLost()) - { - ResetDevice(); - } - m_dev->Present(m_wnd.GetClientRect().fit(m_aspectratio), m_shader); // snapshot diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index 1817ce0786..e52e16d357 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -127,7 +127,10 @@ protected: { // FIXME: berserk fpsm = 27 (8H) - Draw(); + if(!m_dev->IsLost()) + { + Draw(); + } m_perfmon.Put(GSPerfMon::Draw, 1); } diff --git a/plugins/GSdx/GSRendererDX9.cpp b/plugins/GSdx/GSRendererDX9.cpp index c6e4be5a4f..b32a51c100 100644 --- a/plugins/GSdx/GSRendererDX9.cpp +++ b/plugins/GSdx/GSRendererDX9.cpp @@ -122,14 +122,14 @@ void GSRendererDX9::VertexKick(bool skip) case GS_LINELIST: case GS_LINESTRIP: case GS_SPRITE: - pmin = v[0].p.minv(v[1].p); - pmax = v[0].p.maxv(v[1].p); + pmin = v[0].p.min(v[1].p); + pmax = v[0].p.max(v[1].p); break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: - pmin = v[0].p.minv(v[1].p).minv(v[2].p); - pmax = v[0].p.maxv(v[1].p).maxv(v[2].p); + pmin = v[0].p.min(v[1].p).min(v[2].p); + pmax = v[0].p.max(v[1].p).max(v[2].p); break; } diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 63bee1db97..f525192b49 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -87,51 +87,34 @@ GSTexture* GSRendererSW::GetOutput(int i) TEX0.TBW = DISPFB.FBW; TEX0.PSM = DISPFB.PSM; - GSVector4i r(0, 0, TEX0.TBW * 64, GetFrameRect(i).bottom); + int w = TEX0.TBW * 64; + int h = GetFrameRect(i).bottom; // TODO: round up bottom - int w = r.width(); - int h = r.height(); - - if(m_texture[i]) + if(m_dev->ResizeTexture(&m_texture[i], w, h)) { - if(m_texture[i]->GetWidth() != w || m_texture[i]->GetHeight() != h) + // TODO + static uint8* buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16); + static int pitch = 1024 * 4; + + GSVector4i r(0, 0, w, h); + + m_mem.ReadTexture(r, buff, pitch, TEX0, m_env.TEXA); + + m_texture[i]->Update(r, buff, pitch); + + if(s_dump) { - delete m_texture[i]; + if(s_save) + { + m_texture[i]->Save(format("c:\\temp1\\_%05d_f%I64d_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM)); + } - m_texture[i] = NULL; + s_n++; } } - if(!m_texture[i]) - { - m_texture[i] = m_dev->CreateTexture(w, h); - - if(!m_texture[i]) - { - return NULL; - } - } - - // TODO - static uint8* buff = (uint8*)_aligned_malloc(1024 * 1024 * 4, 16); - static int pitch = 1024 * 4; - - m_mem.ReadTexture(r, buff, pitch, TEX0, m_env.TEXA); - - m_texture[i]->Update(r, buff, pitch); - - if(s_dump) - { - if(s_save) - { - m_texture[i]->Save(format("c:\\temp1\\_%05d_f%I64d_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM)); - } - - s_n++; - } - return m_texture[i]; } @@ -273,9 +256,9 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) p.vm = m_mem.m_vm8; - p.fbo = m_mem.GetOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM); - p.zbo = m_mem.GetOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM); - p.fzbo = m_mem.GetOffset4(context->FRAME, context->ZBUF); + p.fbo = m_mem.GetPixelOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM); + p.zbo = m_mem.GetPixelOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM); + p.fzbo = m_mem.GetPixelOffset4(context->FRAME, context->ZBUF); p.sel.key = 0; @@ -517,14 +500,14 @@ if(!m_dump) case GS_LINELIST: case GS_LINESTRIP: case GS_SPRITE: - pmin = v[0].p.minv(v[1].p); - pmax = v[0].p.maxv(v[1].p); + pmin = v[0].p.min(v[1].p); + pmax = v[0].p.max(v[1].p); break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: - pmin = v[0].p.minv(v[1].p).minv(v[2].p); - pmax = v[0].p.maxv(v[1].p).maxv(v[2].p); + pmin = v[0].p.min(v[1].p).min(v[2].p); + pmax = v[0].p.max(v[1].p).max(v[2].p); break; } diff --git a/plugins/GSdx/GSScanlineEnvironment.h b/plugins/GSdx/GSScanlineEnvironment.h index b7ee1cdb9a..bbe30b8eda 100644 --- a/plugins/GSdx/GSScanlineEnvironment.h +++ b/plugins/GSdx/GSScanlineEnvironment.h @@ -108,9 +108,9 @@ __declspec(align(16)) struct GSScanlineParam const uint32* clut; uint32 tw; - GSLocalMemory::Offset* fbo; - GSLocalMemory::Offset* zbo; - GSLocalMemory::Offset4* fzbo; + GSLocalMemory::PixelOffset* fbo; + GSLocalMemory::PixelOffset* zbo; + GSLocalMemory::PixelOffset4* fzbo; uint32 fm, zm; }; @@ -122,10 +122,10 @@ __declspec(align(16)) struct GSScanlineEnvironment const uint32* clut; uint32 tw; - GSVector4i* fbr; - GSVector4i* zbr; - int** fbc; - int** zbc; + int* fbr; + int* zbr; + int* fbc; + int* zbc; GSVector2i* fzbr; GSVector2i* fzbc; diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index d7de1efd3b..1c77edd590 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -948,7 +948,7 @@ void GSState::FlushWrite() r.left = m_env.TRXPOS.DSAX; r.top = y; r.right = r.left + m_env.TRXREG.RRW; - r.bottom = min(r.top + m_env.TRXREG.RRH, m_tr.x == r.left ? m_tr.y : m_tr.y + 1); + r.bottom = std::min(r.top + m_env.TRXREG.RRH, m_tr.x == r.left ? m_tr.y : m_tr.y + 1); InvalidateVideoMem(m_env.BITBLTBUF, r); /* @@ -1085,17 +1085,20 @@ void GSState::Move() // TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format) + GSLocalMemory::PixelOffset* RESTRICT spo = m_mem.GetPixelOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM); + GSLocalMemory::PixelOffset* RESTRICT dpo = m_mem.GetPixelOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM); + if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16) { - int* soffset = spsm.rowOffset[0]; - int* doffset = dpsm.rowOffset[0]; + int* soffset = spo->col[0]; + int* doffset = dpo->col[0]; if(spsm.trbpp == 32) { for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) { - uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); - uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + uint32 sbase = spo->row[sy]; + uint32 dbase = dpo->row[dy]; for(int x = 0; x < w; x++, sx += xinc, dx += xinc) { @@ -1107,8 +1110,8 @@ void GSState::Move() { for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) { - uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); - uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + uint32 sbase = spo->row[sy]; + uint32 dbase = dpo->row[dy]; for(int x = 0; x < w; x++, sx += xinc, dx += xinc) { @@ -1120,8 +1123,8 @@ void GSState::Move() { for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) { - uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); - uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + uint32 sbase = spo->row[sy]; + uint32 dbase = dpo->row[dy]; for(int x = 0; x < w; x++, sx += xinc, dx += xinc) { @@ -1134,11 +1137,11 @@ void GSState::Move() { for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) { - uint32 sbase = GSLocalMemory::PixelAddress8(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); - int* soffset = spsm.rowOffset[sy & 7]; + uint32 sbase = spo->row[sy]; + uint32 dbase = dpo->row[dy]; - uint32 dbase = GSLocalMemory::PixelAddress8(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); - int* doffset = dpsm.rowOffset[dy & 7]; + int* soffset = spo->col[sy & 7]; + int* doffset = dpo->col[dy & 7]; for(int x = 0; x < w; x++, sx += xinc, dx += xinc) { @@ -1150,11 +1153,11 @@ void GSState::Move() { for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) { - uint32 sbase = GSLocalMemory::PixelAddress4(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); - int* soffset = spsm.rowOffset[sy & 7]; + uint32 sbase = spo->row[sy]; + uint32 dbase = dpo->row[dy]; - uint32 dbase = GSLocalMemory::PixelAddress4(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); - int* doffset = dpsm.rowOffset[dy & 7]; + int* soffset = spo->col[sy & 7]; + int* doffset = dpo->col[dy & 7]; for(int x = 0; x < w; x++, sx += xinc, dx += xinc) { @@ -1166,11 +1169,11 @@ void GSState::Move() { for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) { - uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); - int* soffset = spsm.rowOffset[sy & 7]; + uint32 sbase = spo->row[sy]; + uint32 dbase = dpo->row[dy]; - uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); - int* doffset = dpsm.rowOffset[dy & 7]; + int* soffset = spo->col[sy & 7]; + int* doffset = dpo->col[dy & 7]; for(int x = 0; x < w; x++, sx += xinc, dx += xinc) { diff --git a/plugins/GSdx/GSTexture7.cpp b/plugins/GSdx/GSTexture7.cpp index 44dd81f7e9..8a9eb36691 100644 --- a/plugins/GSdx/GSTexture7.cpp +++ b/plugins/GSdx/GSTexture7.cpp @@ -82,7 +82,7 @@ bool GSTexture7::Update(const GSVector4i& r, const void* data, int pitch) uint8* src = (uint8*)data; uint8* dst = (uint8*)desc.lpSurface; - int bytes = min(pitch, desc.lPitch); + int bytes = std::min(pitch, desc.lPitch); for(int i = 0, j = r.height(); i < j; i++, src += pitch, dst += desc.lPitch) { diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index b7e7ebf06d..d1893e0eba 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -60,7 +60,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con { Source* s = i->first; - if(((s->m_TEX0.u32[0] ^ TEX0.u32[0]) | ((s->m_TEX0.u32[1] ^ TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH + if(((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH { continue; } @@ -70,7 +70,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con continue; } - if(psm.pal > 0 && !GSVector4i::compare(s->m_clut, clut, psm.pal * sizeof(clut[0]))) + if(psm.pal > 0 && !GSVector4i::compare(clut, s->m_clut, psm.pal * sizeof(clut[0]))) { continue; } @@ -84,13 +84,16 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con if(src == NULL) { + uint32 bp = TEX0.TBP0; + uint32 psm = TEX0.PSM; + for(int type = 0; type < 2 && dst == NULL; type++) { for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) { Target* t = *i; - if(t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(t->m_TEX0.TBP0, t->m_TEX0.PSM, TEX0.TBP0, TEX0.PSM)) + if(t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { dst = t; @@ -116,7 +119,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con memcpy(src->m_clut, clut, psm.pal * sizeof(clut[0])); } - m_src.Add(src, TEX0); + m_src.Add(src, TEX0, m_renderer->m_mem); } if(psm.pal > 0) @@ -142,13 +145,15 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, bool fb) { + uint32 bp = TEX0.TBP0; + Target* dst = NULL; for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) { Target* t = *i; - if(t->m_TEX0.TBP0 == TEX0.TBP0) + if(bp == t->m_TEX0.TBP0) { m_dst[type].splice(m_dst[type].begin(), m_dst[type], i); @@ -168,7 +173,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int { Target* t = *i; - if(t->m_TEX0.TBP0 <= TEX0.TBP0 && TEX0.TBP0 < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0)) + if(t->m_TEX0.TBP0 <= bp && bp < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0)) { dst = t; } @@ -229,14 +234,13 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect, bool target) { - bool found = false; - - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[BITBLTBUF.DPSM]; - uint32 bp = BITBLTBUF.DBP; uint32 bw = BITBLTBUF.DBW; + uint32 psm = BITBLTBUF.DPSM; - GSVector2i bs = (bp & 31) == 0 ? psm.pgs : psm.bs; + const GSLocalMemory::BlockOffset* bo = m_renderer->m_mem.GetBlockOffset(bp, bw, psm); + + GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs; GSVector4i r = rect.ralign(bs); @@ -250,20 +254,22 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const Source* s = j->first; - if(GSUtil::HasSharedBits(bp, BITBLTBUF.DPSM, s->m_TEX0.TBP0, s->m_TEX0.PSM)) + if(GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM)) { m_src.RemoveAt(s); } } } + bool found = false; + for(int y = r.top; y < r.bottom; y += bs.y) { - uint32 base = psm.bn(0, y, bp, bw); + uint32 base = bo->row[y >> 3]; for(int x = r.left; x < r.right; x += bs.x) { - uint32 page = (base + psm.blockOffset[x >> 3]) >> 5; + uint32 page = (base + bo->col[x >> 3]) >> 5; if(page < MAX_PAGES) { @@ -275,20 +281,22 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const Source* s = j->first; - if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, s->m_TEX0.PSM)) + if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM)) { + bool b = bp == s->m_TEX0.TBP0; + if(!s->m_target) { s->m_valid[page] = 0; s->m_complete = false; - found = true; + found = b; } else { // TODO - if(s->m_TEX0.TBP0 == bp) + if(b) { m_src.RemoveAt(s); } @@ -309,12 +317,12 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const Target* t = *j; - if(GSUtil::HasSharedBits(BITBLTBUF.DBP, BITBLTBUF.DPSM, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { - if(!found && GSUtil::HasCompatibleBits(BITBLTBUF.DPSM, t->m_TEX0.PSM)) + if(!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) { - t->m_dirty.push_back(GSDirtyRect(r, BITBLTBUF.DPSM)); - t->m_TEX0.TBW = BITBLTBUF.DBW; + t->m_dirty.push_back(GSDirtyRect(r, psm)); + t->m_TEX0.TBW = bw; } else { @@ -324,20 +332,20 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const } } - if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, t->m_TEX0.PSM) && BITBLTBUF.DBP < t->m_TEX0.TBP0) + if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && bp < t->m_TEX0.TBP0) { - uint32 rowsize = BITBLTBUF.DBW * 8192; - uint32 offset = (uint32)((t->m_TEX0.TBP0 - BITBLTBUF.DBP) * 256); + uint32 rowsize = bw * 8192; + uint32 offset = (uint32)((t->m_TEX0.TBP0 - bp) * 256); if(rowsize > 0 && offset % rowsize == 0) { - int y = GSLocalMemory::m_psm[BITBLTBUF.DPSM].pgs.y * offset / rowsize; + int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; if(r.bottom > y) { // TODO: do not add this rect above too - t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), BITBLTBUF.DPSM)); - t->m_TEX0.TBW = BITBLTBUF.DBW; + t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), psm)); + t->m_TEX0.TBW = bw; continue; } } @@ -348,21 +356,24 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const void GSTextureCache::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) { + uint32 bp = BITBLTBUF.SBP; + uint32 psm = BITBLTBUF.SPSM; + for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); ) { list::iterator j = i++; Target* t = *j; - if(GSUtil::HasSharedBits(BITBLTBUF.SBP, BITBLTBUF.SPSM, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { - if(GSUtil::HasCompatibleBits(BITBLTBUF.SPSM, t->m_TEX0.PSM)) + if(GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) { t->Read(r); return; } - else if(BITBLTBUF.SPSM == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S)) + else if(psm == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S)) { // ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit @@ -518,9 +529,7 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE m_TEX0 = TEX0; m_TEXA = TEXA; - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; - - GSVector2i bs = psm.bs; + GSVector2i bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs; GSVector4i r = rect.ralign(bs); @@ -529,20 +538,19 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE m_complete = true; // lame, but better than nothing } - uint32 bp = m_TEX0.TBP0; - uint32 bw = m_TEX0.TBW; + const GSLocalMemory::BlockOffset* bo = m_renderer->m_mem.GetBlockOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); - bool repeating = (1 << m_TEX0.TW) > (bw << 6); // TODO: bw == 0 + bool repeating = m_TEX0.IsRepeating(); uint32 blocks = 0; for(int y = r.top; y < r.bottom; y += bs.y) { - uint32 base = psm.bn(0, y, bp, bw); + uint32 base = bo->row[y >> 3]; for(int x = r.left; x < r.right; x += bs.x) { - uint32 block = base + psm.blockOffset[x >> 3]; + uint32 block = base + bo->col[x >> 3]; if(block < MAX_BLOCKS) { @@ -570,11 +578,11 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE { for(int y = r.top; y < r.bottom; y += bs.y) { - uint32 base = psm.bn(0, y, bp, bw); + uint32 base = bo->row[y >> 3]; for(int x = r.left; x < r.right; x += bs.x) { - uint32 block = base + psm.blockOffset[x >> 3]; + uint32 block = base + bo->col[x >> 3]; if(block < MAX_BLOCKS) { @@ -640,7 +648,7 @@ void GSTextureCache::Source::Flush(uint32 count) int pitch = max(tw, psm.bs.x) * sizeof(uint32); - const GSLocalMemory& mem = m_renderer->m_mem; + GSLocalMemory& mem = m_renderer->m_mem; GSLocalMemory::readTexture rtx = psm.rtx; @@ -767,42 +775,44 @@ void GSTextureCache::Target::Update() { // do the most likely thing a direct write would do, clear it - m_renderer->m_dev->ClearDepth(m_texture, 0); + if((m_renderer->m_game.flags & CRC::ZWriteMustNotClear) == 0) + { + m_renderer->m_dev->ClearDepth(m_texture, 0); + } } } // GSTextureCache::SourceMap -void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0) +void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMemory& mem) { m_surfaces[s] = true; - int tw = 1 << TEX0.TW; - int th = 1 << TEX0.TH; - - uint32 bp = TEX0.TBP0; - uint32 bw = TEX0.TBW; - if(s->m_target) { // TODO - m_map[bp >> 5][s] = true; + m_map[TEX0.TBP0 >> 5][s] = true; return; } + const GSLocalMemory::BlockOffset* bo = mem.GetBlockOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; - GSVector2i bs = (bp & 31) ? psm.pgs : psm.bs; + GSVector2i bs = (TEX0.TBP0 & 31) ? psm.pgs : psm.bs; + + int tw = 1 << TEX0.TW; + int th = 1 << TEX0.TH; for(int y = 0; y < th; y += bs.y) { - uint32 base = psm.bn(0, y, bp, bw); + uint32 base = bo->row[y >> 3]; for(int x = 0; x < tw; x += bs.x) { - uint32 page = (base + psm.blockOffset[x >> 3]) >> 5; + uint32 page = (base + bo->col[x >> 3]) >> 5; if(page < MAX_PAGES) { diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 3ac0392959..a09a129f80 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -100,7 +100,7 @@ protected: SourceMap() : m_used(false) {memset(m_pages, 0, sizeof(m_pages));} - void Add(Source* s, const GIFRegTEX0& TEX0); + void Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMemory& mem); void RemoveAll(); void RemoveAt(Source* s); diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index 572097ba58..7b31f877c2 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -45,7 +45,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE { GSTexture* t2 = i->first; - if(((t2->m_TEX0.u32[0] ^ TEX0.u32[0]) | ((t2->m_TEX0.u32[1] ^ TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH + if(((TEX0.u32[0] ^ t2->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ t2->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH { continue; } @@ -68,21 +68,20 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE m_textures[t] = true; + const GSLocalMemory::BlockOffset* bo = m_state->m_mem.GetBlockOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); + + GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs; + int tw = 1 << TEX0.TW; int th = 1 << TEX0.TH; - uint32 bp = TEX0.TBP0; - uint32 bw = TEX0.TBW; - - GSVector2i s = (bp & 31) == 0 ? psm.pgs : psm.bs; - - for(int y = 0; y < th; y += s.y) + for(int y = 0; y < th; y += bs.y) { - uint32 base = psm.bn(0, y, bp, bw); + uint32 base = bo->row[y >> 3]; - for(int x = 0; x < tw; x += s.x) + for(int x = 0; x < tw; x += bs.x) { - uint32 page = (base + psm.blockOffset[x >> 3]) >> 5; + uint32 page = (base + bo->col[x >> 3]) >> 5; if(page < MAX_PAGES) { @@ -168,22 +167,23 @@ void GSTextureCacheSW::IncAge() void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect) { - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[BITBLTBUF.DPSM]; - uint32 bp = BITBLTBUF.DBP; uint32 bw = BITBLTBUF.DBW; + uint32 psm = BITBLTBUF.DPSM; - GSVector2i s = (bp & 31) == 0 ? psm.pgs : psm.bs; + const GSLocalMemory::BlockOffset* bo = m_state->m_mem.GetBlockOffset(bp, bw, psm); - GSVector4i r = rect.ralign(s); + GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs; - for(int y = r.top; y < r.bottom; y += s.y) + GSVector4i r = rect.ralign(bs); + + for(int y = r.top; y < r.bottom; y += bs.y) { - uint32 base = psm.bn(0, y, bp, bw); + uint32 base = bo->row[y >> 3]; - for(int x = r.left; x < r.right; x += s.x) + for(int x = r.left; x < r.right; x += bs.x) { - uint32 page = (base + psm.blockOffset[x >> 3]) >> 5; + uint32 page = (base + bo->col[x >> 3]) >> 5; if(page < MAX_PAGES) { @@ -193,7 +193,7 @@ void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, cons { GSTexture* t = i->first; - if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, t->m_TEX0.PSM)) + if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM)) { t->m_valid[page] = 0; t->m_complete = false; @@ -236,10 +236,17 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; - GSVector2i s = psm.bs; + GSVector2i bs = psm.bs; - int tw = max(1 << TEX0.TW, s.x); - int th = max(1 << TEX0.TH, s.y); + int tw = max(1 << TEX0.TW, bs.x); + int th = max(1 << TEX0.TH, bs.y); + + GSVector4i r = rect.ralign(bs); + + if(r.eq(GSVector4i(0, 0, tw, th))) + { + m_complete = true; // lame, but better than nothing + } if(m_buff == NULL) { @@ -250,25 +257,17 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX return false; } - m_tw = max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff + m_tw = std::max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff } - GSVector4i r = rect.ralign(s); - - if(r.eq(GSVector4i(0, 0, tw, th))) - { - m_complete = true; // lame, but better than nothing - } - - uint32 bp = TEX0.TBP0; - uint32 bw = TEX0.TBW; - - bool repeating = tw > (bw << 6); // TODO: bw == 0 - - uint32 blocks = 0; - GSLocalMemory& mem = m_state->m_mem; + const GSLocalMemory::BlockOffset* bo = mem.GetBlockOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); + + bool repeating = m_TEX0.IsRepeating(); + + uint32 blocks = 0; + GSLocalMemory::readTextureBlock rtxb = psm.rtxbP; int shift = psm.pal == 0 ? 2 : 0; @@ -277,13 +276,13 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX uint8* dst = (uint8*)m_buff + pitch * r.top; - for(int y = r.top, o = pitch * s.y; y < r.bottom; y += s.y, dst += o) + for(int y = r.top, o = pitch * bs.y; y < r.bottom; y += bs.y, dst += o) { - uint32 base = psm.bn(0, y, bp, bw); + uint32 base = bo->row[y >> 3]; - for(int x = r.left; x < r.right; x += s.x) + for(int x = r.left; x < r.right; x += bs.x) { - uint32 block = base + psm.blockOffset[x >> 3]; + uint32 block = base + bo->col[x >> 3]; if(block < MAX_BLOCKS) { @@ -309,13 +308,13 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX { if(repeating) { - for(int y = r.top; y < r.bottom; y += s.y) + for(int y = r.top; y < r.bottom; y += bs.y) { - uint32 base = psm.bn(0, y, bp, bw); + uint32 base = bo->row[y >> 3]; - for(int x = r.left; x < r.right; x += s.x) + for(int x = r.left; x < r.right; x += bs.x) { - uint32 block = base + psm.blockOffset[x >> 3]; + uint32 block = base + bo->col[x >> 3]; if(block < MAX_BLOCKS) { @@ -328,7 +327,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX } } - m_state->m_perfmon.Put(GSPerfMon::Unswizzle, s.x * s.y * blocks << shift); + m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift); } return true; diff --git a/plugins/GSdx/GSUtil.cpp b/plugins/GSdx/GSUtil.cpp index 7699d9d6ee..4e9236228a 100644 --- a/plugins/GSdx/GSUtil.cpp +++ b/plugins/GSdx/GSUtil.cpp @@ -28,8 +28,8 @@ static struct GSUtilMaps { uint8 PrimClassField[8]; - bool CompatibleBitsField[64][64]; - bool SharedBitsField[64][64]; + uint32 CompatibleBitsField[64][2]; + uint32 SharedBitsField[64][2]; struct GSUtilMaps() { @@ -44,31 +44,36 @@ static struct GSUtilMaps memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField)); - CompatibleBitsField[PSM_PSMCT32][PSM_PSMCT24] = true; - CompatibleBitsField[PSM_PSMCT24][PSM_PSMCT32] = true; - CompatibleBitsField[PSM_PSMCT16][PSM_PSMCT16S] = true; - CompatibleBitsField[PSM_PSMCT16S][PSM_PSMCT16] = true; - CompatibleBitsField[PSM_PSMZ32][PSM_PSMZ24] = true; - CompatibleBitsField[PSM_PSMZ24][PSM_PSMZ32] = true; - CompatibleBitsField[PSM_PSMZ16][PSM_PSMZ16S] = true; - CompatibleBitsField[PSM_PSMZ16S][PSM_PSMZ16] = true; + for(int i = 0; i < 64; i++) + { + CompatibleBitsField[i][i >> 5] |= 1 << (i & 0x1f); + } - memset(SharedBitsField, 1, sizeof(SharedBitsField)); + CompatibleBitsField[PSM_PSMCT32][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f); + CompatibleBitsField[PSM_PSMCT24][PSM_PSMCT32 >> 5] |= 1 << (PSM_PSMCT32 & 0x1f); + CompatibleBitsField[PSM_PSMCT16][PSM_PSMCT16S >> 5] |= 1 << (PSM_PSMCT16S & 0x1f); + CompatibleBitsField[PSM_PSMCT16S][PSM_PSMCT16 >> 5] |= 1 << (PSM_PSMCT16 & 0x1f); + CompatibleBitsField[PSM_PSMZ32][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f); + CompatibleBitsField[PSM_PSMZ24][PSM_PSMZ32 >> 5] |= 1 << (PSM_PSMZ32 & 0x1f); + CompatibleBitsField[PSM_PSMZ16][PSM_PSMZ16S >> 5] |= 1 << (PSM_PSMZ16S & 0x1f); + CompatibleBitsField[PSM_PSMZ16S][PSM_PSMZ16 >> 5] |= 1 << (PSM_PSMZ16 & 0x1f); - SharedBitsField[PSM_PSMCT24][PSM_PSMT8H] = false; - SharedBitsField[PSM_PSMCT24][PSM_PSMT4HL] = false; - SharedBitsField[PSM_PSMCT24][PSM_PSMT4HH] = false; - SharedBitsField[PSM_PSMZ24][PSM_PSMT8H] = false; - SharedBitsField[PSM_PSMZ24][PSM_PSMT4HL] = false; - SharedBitsField[PSM_PSMZ24][PSM_PSMT4HH] = false; - SharedBitsField[PSM_PSMT8H][PSM_PSMCT24] = false; - SharedBitsField[PSM_PSMT8H][PSM_PSMZ24] = false; - SharedBitsField[PSM_PSMT4HL][PSM_PSMCT24] = false; - SharedBitsField[PSM_PSMT4HL][PSM_PSMZ24] = false; - SharedBitsField[PSM_PSMT4HL][PSM_PSMT4HH] = false; - SharedBitsField[PSM_PSMT4HH][PSM_PSMCT24] = false; - SharedBitsField[PSM_PSMT4HH][PSM_PSMZ24] = false; - SharedBitsField[PSM_PSMT4HH][PSM_PSMT4HL] = false; + memset(SharedBitsField, 0, sizeof(SharedBitsField)); + + SharedBitsField[PSM_PSMCT24][PSM_PSMT8H >> 5] |= 1 << (PSM_PSMT8H & 0x1f); + SharedBitsField[PSM_PSMCT24][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f); + SharedBitsField[PSM_PSMCT24][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f); + SharedBitsField[PSM_PSMZ24][PSM_PSMT8H >> 5] |= 1 << (PSM_PSMT8H & 0x1f); + SharedBitsField[PSM_PSMZ24][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f); + SharedBitsField[PSM_PSMZ24][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f); + SharedBitsField[PSM_PSMT8H][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f); + SharedBitsField[PSM_PSMT8H][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f); + SharedBitsField[PSM_PSMT4HL][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f); + SharedBitsField[PSM_PSMT4HL][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f); + SharedBitsField[PSM_PSMT4HL][PSM_PSMT4HH >> 5] |= 1 << (PSM_PSMT4HH & 0x1f); + SharedBitsField[PSM_PSMT4HH][PSM_PSMCT24 >> 5] |= 1 << (PSM_PSMCT24 & 0x1f); + SharedBitsField[PSM_PSMT4HH][PSM_PSMZ24 >> 5] |= 1 << (PSM_PSMZ24 & 0x1f); + SharedBitsField[PSM_PSMT4HH][PSM_PSMT4HL >> 5] |= 1 << (PSM_PSMT4HL & 0x1f); } } s_maps; @@ -80,21 +85,17 @@ GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim) bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm) { - return s_maps.SharedBitsField[spsm][dpsm]; + return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0; } bool GSUtil::HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm) { - if(sbp != dbp) return false; - - return HasSharedBits(spsm, dpsm); + return ((sbp ^ dbp) | (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f)))) == 0; } bool GSUtil::HasCompatibleBits(uint32 spsm, uint32 dpsm) { - if(spsm == dpsm) return true; - - return s_maps.CompatibleBitsField[spsm][dpsm]; + return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0; } bool GSUtil::CheckDirectX() diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index 3e13c5c879..b3cbfc261b 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -1032,7 +1032,7 @@ public: bool allfalse() const { #if _M_SSE >= 0x401 - return _mm_testz_si128(m, m); + return _mm_testz_si128(m, m) != 0; #else return _mm_movemask_epi8(m) == 0; #endif @@ -2485,15 +2485,15 @@ public: GSVector4 clamp(const float scale = 255) const { - return minv(GSVector4(scale)); + return min(GSVector4(scale)); } - GSVector4 minv(const GSVector4& a) const + GSVector4 min(const GSVector4& a) const { return GSVector4(_mm_min_ps(m, a)); } - GSVector4 maxv(const GSVector4& a) const + GSVector4 max(const GSVector4& a) const { return GSVector4(_mm_max_ps(m, a)); } @@ -2550,7 +2550,7 @@ public: { #if _M_SSE >= 0x401 __m128i a = _mm_castps_si128(m); - return _mm_testz_si128(a, a); + return _mm_testz_si128(a, a) != 0; #else return _mm_movemask_ps(m) == 0; #endif diff --git a/plugins/GSdx/GSdx_vs2008.vcproj b/plugins/GSdx/GSdx_vs2008.vcproj index 8de80b5287..b6ae9a9c48 100644 --- a/plugins/GSdx/GSdx_vs2008.vcproj +++ b/plugins/GSdx/GSdx_vs2008.vcproj @@ -1371,6 +1371,14 @@ AssemblerOutput="4" /> + + + diff --git a/plugins/GSdx/stdafx.h b/plugins/GSdx/stdafx.h index 375fb8d206..7d1c48d988 100644 --- a/plugins/GSdx/stdafx.h +++ b/plugins/GSdx/stdafx.h @@ -190,3 +190,6 @@ typedef signed long long int64; #include #endif + +#undef min +#undef max \ No newline at end of file