From 0b62c17d9c0c1f6771ccd6ec22fb4129faf81699 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Tue, 20 Dec 2011 14:33:28 +0000 Subject: [PATCH] GSdx: Renamed the sw thread setting to "extra threads". - 0: no multi-threading - 1: gif packet processing and texture uploads run parallel with rendering, the slowest decides the fps, dual-cores can still suffer by the spin loops, I'll check that when I compile pcsx2 on my notebook - 2: two rendering threads, on a decent cpu packet processing is going to be slower now, this is probably going to increase fps the most on quads - 3: small fps increase - 4+: even smaller. If you have a quad cpu with HT, 6 is the max, 1 + 1 is needed for pcsx2 and gsdx's basic tasks. Also hacked palette writes to not force a read-back in hw mode (added in previous rev), it hit render targets in a surprising large number of games. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4998 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GPU.cpp | 2 +- plugins/GSdx/GPURendererSW.h | 2 +- plugins/GSdx/GPUSettingsDlg.cpp | 6 +- plugins/GSdx/GS.cpp | 4 +- plugins/GSdx/GSLinuxDialog.cpp | 4 +- plugins/GSdx/GSLocalMemory.cpp | 12 ++- plugins/GSdx/GSRasterizer.cpp | 54 +++++------ plugins/GSdx/GSRasterizer.h | 29 +++--- plugins/GSdx/GSRendererHW.h | 4 +- plugins/GSdx/GSRendererSW.cpp | 96 +++++++++----------- plugins/GSdx/GSRendererSW.h | 4 +- plugins/GSdx/GSSettingsDlg.cpp | 6 +- plugins/GSdx/GSState.cpp | 108 +++++++++++----------- plugins/GSdx/GSState.h | 112 +++++++++++------------ plugins/GSdx/GSTextureCache.cpp | 2 +- plugins/GSdx/GSTextureCacheSW.cpp | 2 +- plugins/GSdx/GSThread.h | 143 ++++++++++++++++++++++++++---- plugins/GSdx/GSdx.rc | 41 ++++----- plugins/GSdx/resource.h | 2 +- plugins/GSdx/stdafx.h | 17 ++++ 20 files changed, 387 insertions(+), 263 deletions(-) diff --git a/plugins/GSdx/GPU.cpp b/plugins/GSdx/GPU.cpp index 5d8ab18c92..c2764016eb 100644 --- a/plugins/GSdx/GPU.cpp +++ b/plugins/GSdx/GPU.cpp @@ -109,7 +109,7 @@ EXPORT_C_(int32) GPUopen(void* hWnd) #endif int renderer = theApp.GetConfig("Renderer", 1); - int threads = theApp.GetConfig("swthreads", 1); + int threads = theApp.GetConfig("extrathreads", 0); switch(renderer) { diff --git a/plugins/GSdx/GPURendererSW.h b/plugins/GSdx/GPURendererSW.h index c9d4ccef00..dd39baa217 100644 --- a/plugins/GSdx/GPURendererSW.h +++ b/plugins/GSdx/GPURendererSW.h @@ -49,7 +49,7 @@ class GPURendererSW : public GPURendererT }; protected: - GSRasterizerList* m_rl; + IRasterizer* m_rl; GSTexture* m_texture; uint32* m_output; diff --git a/plugins/GSdx/GPUSettingsDlg.cpp b/plugins/GSdx/GPUSettingsDlg.cpp index a2fa7e9cca..2ef5aaaf7a 100644 --- a/plugins/GSdx/GPUSettingsDlg.cpp +++ b/plugins/GSdx/GPUSettingsDlg.cpp @@ -73,8 +73,8 @@ void GPUSettingsDlg::OnInit() CheckDlgButton(m_hWnd, IDC_WINDOWED, theApp.GetConfig("windowed", 1)); - SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 1)); - SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("swthreads", 1), 0)); + SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 0)); + SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("extrathreads", 0), 0)); UpdateControls(); } @@ -124,7 +124,7 @@ bool GPUSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) theApp.SetConfig("scale_y", (data >> 2) & 3); } - theApp.SetConfig("swthreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0)); + theApp.SetConfig("extrathreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0)); theApp.SetConfig("windowed", (int)IsDlgButtonChecked(m_hWnd, IDC_WINDOWED)); } diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index f3ccaf104b..a44d987b18 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -190,7 +190,7 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1) if(threads == -1) { - threads = theApp.GetConfig("swthreads", 1); + threads = theApp.GetConfig("extrathreads", 0); } try @@ -769,6 +769,8 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) GSvsync(1); + Sleep(100); + bool exit = false; while(!exit) diff --git a/plugins/GSdx/GSLinuxDialog.cpp b/plugins/GSdx/GSLinuxDialog.cpp index 271b8e2619..7e1f826208 100644 --- a/plugins/GSdx/GSLinuxDialog.cpp +++ b/plugins/GSdx/GSLinuxDialog.cpp @@ -102,10 +102,10 @@ bool RunLinuxDialog() gtk_container_add(GTK_CONTAINER(main_box), interlace_label); gtk_container_add(GTK_CONTAINER(main_box), interlace_combo_box); - swthreads_label = gtk_label_new("Software renderer threads:"); + swthreads_label = gtk_label_new("Extra sw renderer threads:"); swthreads_text = gtk_entry_new(); char buf[5]; - sprintf(buf, "%d", theApp.GetConfig("swthreads", 1)); + sprintf(buf, "%d", theApp.GetConfig("extrathreads", 0)); gtk_entry_set_text(GTK_ENTRY(swthreads_text), buf); gtk_container_add(GTK_CONTAINER(main_box), swthreads_label); diff --git a/plugins/GSdx/GSLocalMemory.cpp b/plugins/GSdx/GSLocalMemory.cpp index 379c05c117..499c056f63 100644 --- a/plugins/GSdx/GSLocalMemory.cpp +++ b/plugins/GSdx/GSLocalMemory.cpp @@ -547,6 +547,8 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G return o; } +static bool cmp_vec2x(const GSVector2i& a, const GSVector2i& b) {return a.x < b.x;} + list* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) { uint32 hash = TEX0.TBP0 | (TEX0.TBW << 14) | (TEX0.PSM << 20) | (TEX0.TW << 26); @@ -613,10 +615,18 @@ list* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) } } + // sort by x and flip the mask (it will be used to erase a lot of bits in a loop, [x] &= ~y) + + vector tmp; + for(hash_map::iterator j = m.begin(); j != m.end(); j++) { - p2t[page].push_back(GSVector2i(j->first, j->second)); + tmp.push_back(GSVector2i(j->first, ~j->second)); } + + std::sort(tmp.begin(), tmp.end(), cmp_vec2x); + + p2t[page].insert(p2t[page].end(), tmp.begin(), tmp.end()); } m_p2tmap[hash] = p2t; diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 3c470fb6c7..3a32367d3b 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -24,8 +24,11 @@ #include "stdafx.h" #include "GSRasterizer.h" -#define THREAD_HEIGHT 5 -//#define THREAD_HEIGHT 1 +// - for more threads screen segments should be smaller to better distribute the pixels +// - but not too small to keep the threading overhead low +// - ideal value between 3 and 5, or log2(64 / number of threads) + +#define THREAD_HEIGHT 4 GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads) : m_ds(ds) @@ -764,7 +767,6 @@ GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads) : GSRasterizer(ds, id, threads) , m_exit(false) , m_break(true) - , m_ready(true) { CreateThread(); } @@ -790,8 +792,6 @@ void GSRasterizerMT::Queue(shared_ptr data) { m_break = false; - m_ready = false; - m_draw.Set(); } } @@ -801,8 +801,6 @@ void GSRasterizerMT::Sync() while(!m_queue.empty()) _mm_pause(); m_break = true; - - while(!m_ready) _mm_pause(); } void GSRasterizerMT::ThreadProc() @@ -815,35 +813,30 @@ void GSRasterizerMT::ThreadProc() { if(!m_queue.empty()) { - queue > queue; - + while(!m_queue.empty()) { - GSAutoLock l(&m_lock); + shared_ptr data; - // TODO: queue.swap(m_queue); // GCC - - while(!m_queue.empty()) { - queue.push(m_queue.front()); + GSAutoLock l(&m_lock); + + data = m_queue.front(); + } + + Draw(data); + + { + GSAutoLock l(&m_lock); m_queue.pop(); } } - - while(!queue.empty()) - { - Draw(queue.front()); - - queue.pop(); - } } else { _mm_pause(); } } - - m_ready = true; } } @@ -864,20 +857,15 @@ GSRasterizerList::~GSRasterizerList() } } -void GSRasterizerList::Draw(shared_ptr data) -{ - Sync(); - - front()->Draw(data); -} - void GSRasterizerList::Queue(shared_ptr data) { - if(size() > 1) + if(size() > 1 && data->solidrect) // TODO: clip to thread area and dispatch? { - ASSERT(!data->solidrect); // should call Draw instead, but it will work anyway + Sync(); // complete previous drawings - data->solidrect = false; + front()->Draw(data); + + return; } GSVector4i bbox = data->bbox.rintersect(data->scissor); diff --git a/plugins/GSdx/GSRasterizer.h b/plugins/GSdx/GSRasterizer.h index e5caca4abd..67b02565c9 100644 --- a/plugins/GSdx/GSRasterizer.h +++ b/plugins/GSdx/GSRasterizer.h @@ -142,6 +142,7 @@ public: // IRasterizer void Queue(shared_ptr data); + void Sync() {} }; class GSRasterizerMT : public GSRasterizer, private GSThread @@ -149,8 +150,7 @@ class GSRasterizerMT : public GSRasterizer, private GSThread protected: volatile bool m_exit; volatile bool m_break; - volatile bool m_ready; - GSAutoResetEvent m_draw; + GSEvent m_draw; queue > m_queue; GSCritSec m_lock; @@ -177,24 +177,27 @@ protected: public: virtual ~GSRasterizerList(); - template static GSRasterizerList* Create(int threads) + template static IRasterizer* Create(int threads) { - GSRasterizerList* rl = new GSRasterizerList(); + threads = std::max(threads, 0); - threads = std::max(threads, 1); // TODO: min(threads, number of cpu cores) - - for(int i = 0; i < threads; i++) + if(threads == 0) { - rl->push_back(new GSRasterizerMT(new DS(), i, threads)); + return new GSRasterizer(new DS(), 0, 1); } + else + { + GSRasterizerList* rl = new GSRasterizerList(); - return rl; + for(int i = 0; i < threads; i++) + { + rl->push_back(new GSRasterizerMT(new DS(), i, threads)); + } + + return rl; + } } - bool IsMultiThreaded() const {return size() > 1;} - - void Draw(shared_ptr data); - // IRasterizer void Queue(shared_ptr data); diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index a408357bbd..158943ee2c 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -747,10 +747,12 @@ protected: m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r); } - void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) + void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) { // printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM); + if(clut) return; // FIXME + m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); } diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 02004cb3a8..6e5dff5064 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -71,7 +71,7 @@ void GSRendererSW::VSync(int field) Sync(); // IncAge might delete a cached texture in use - //printf("m_sync_count = %d\n", m_rl->m_sync_count); m_rl->m_sync_count = 0; + //printf("m_sync_count = %d\n", ((GSRasterizerList*)m_rl)->m_sync_count); ((GSRasterizerList*)m_rl)->m_sync_count = 0; m_tc->IncAge(); @@ -178,66 +178,58 @@ void GSRendererSW::Draw() m_tc->InvalidateVideoMem(m_context->offset.zb, r); } - if(!m_rl->IsMultiThreaded() || data->solidrect || s_dump) + if(s_dump) { - if(s_dump) + Sync(); + + uint64 frame = m_perfmon.GetFrame(); + + string s; + + if(s_save && s_n >= s_saven && PRIM->TME) { - uint64 frame = m_perfmon.GetFrame(); + s = format("c:\\temp1\\_%05d_f%lld_tex_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); - string s; - - if(s_save && s_n >= s_saven && PRIM->TME) - { - s = format("c:\\temp1\\_%05d_f%lld_tex_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); - - m_mem.SaveBMP(s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); - } - - s_n++; - - if(s_save && s_n >= s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); - - m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); - } - - if(s_savez && s_n >= s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); - - m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); - } - - s_n++; + m_mem.SaveBMP(s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); } - m_rl->Draw(data); + s_n++; + + if(s_save && s_n >= s_saven) + { + s = format("c:\\temp1\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); + + m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + } + + if(s_savez && s_n >= s_saven) + { + s = format("c:\\temp1\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); + + m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); + } + + s_n++; + + m_rl->Queue(data); Sync(); - if(s_dump) + if(s_save && s_n >= s_saven) { - uint64 frame = m_perfmon.GetFrame(); + s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); - string s; - - if(s_save && s_n >= s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); - - m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); - } - - if(s_savez && s_n >= s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); - - m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); - } - - s_n++; + m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); } + + if(s_savez && s_n >= s_saven) + { + s = format("c:\\temp1\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); + + m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); + } + + s_n++; } else { @@ -252,8 +244,6 @@ void GSRendererSW::Draw() { InvalidatePages(m_context->offset.zb, r); } - - // Sync(); } // TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims); @@ -295,7 +285,7 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS } } -void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) +void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) { //printf("ilm %05x %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 86199b4587..4e0986be34 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -52,7 +52,7 @@ class GSRendererSW : public GSRendererT }; protected: - GSRasterizerList* m_rl; + IRasterizer* m_rl; GSTextureCacheSW* m_tc; GSTexture* m_texture[2]; uint8* m_output; @@ -69,7 +69,7 @@ protected: void Draw(); void Sync(); void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); - void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); + void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); void InvalidatePages(const GSOffset* o, const GSVector4i& rect); void InvalidatePages(const GSTextureCacheSW::Texture* t); diff --git a/plugins/GSdx/GSSettingsDlg.cpp b/plugins/GSdx/GSSettingsDlg.cpp index 2d83ef4a8a..0a7af7d115 100644 --- a/plugins/GSdx/GSSettingsDlg.cpp +++ b/plugins/GSdx/GSSettingsDlg.cpp @@ -130,8 +130,8 @@ void GSSettingsDlg::OnInit() SendMessage(GetDlgItem(m_hWnd, IDC_MSAA), UDM_SETRANGE, 0, MAKELPARAM(16, 0)); SendMessage(GetDlgItem(m_hWnd, IDC_MSAA), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("msaa", 0), 0)); - SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 1)); - SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("swthreads", 1), 0)); + SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETRANGE, 0, MAKELPARAM(16, 0)); + SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_SETPOS, 0, MAKELPARAM(theApp.GetConfig("extrathreads", 0), 0)); UpdateControls(); } @@ -270,7 +270,7 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) theApp.SetConfig("nativeres", (int)IsDlgButtonChecked(m_hWnd, IDC_NATIVERES)); theApp.SetConfig("resx", (int)SendMessage(GetDlgItem(m_hWnd, IDC_RESX), UDM_GETPOS, 0, 0)); theApp.SetConfig("resy", (int)SendMessage(GetDlgItem(m_hWnd, IDC_RESY), UDM_GETPOS, 0, 0)); - theApp.SetConfig("swthreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0)); + theApp.SetConfig("extrathreads", (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0)); theApp.SetConfig("msaa", (int)SendMessage(GetDlgItem(m_hWnd, IDC_MSAA), UDM_GETPOS, 0, 0)); // Hacks theApp.SetConfig("UserHacks_AlphaHack", (int)IsDlgButtonChecked(m_hWnd, IDC_ALPHAHACK)); diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 31380639f8..fbfa0303cf 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -386,12 +386,12 @@ float GSState::GetFPS() // GIFPackedRegHandler* -__forceinline void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* r) +__forceinline void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r) { // ASSERT(0); } -__forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r) +__forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r) { #if _M_SSE >= 0x301 @@ -418,7 +418,7 @@ __forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r) m_v.RGBAQ.Q = m_q; } -__forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r) +__forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r) { #if defined(_M_AMD64) @@ -445,7 +445,7 @@ __forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r) #endif } -__forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r) +__forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r) { #if _M_SSE >= 0x200 @@ -465,7 +465,7 @@ __forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r) #endif } -__forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r) +__forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r) { m_v.XYZ.X = r->XYZF2.X; m_v.XYZ.Y = r->XYZF2.Y; @@ -475,7 +475,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r) VertexKick(r->XYZF2.ADC); } -__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r) +__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r) { m_v.XYZ.X = r->XYZ2.X; m_v.XYZ.Y = r->XYZ2.Y; @@ -484,23 +484,23 @@ __forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r) VertexKick(r->XYZ2.ADC); } -__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* r) +__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r) { m_v.FOG.F = r->FOG.F; } -__forceinline void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* r) +__forceinline void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r) { (this->*m_fpGIFRegHandlers[r->A_D.ADDR])(&r->r); } -__forceinline void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* r) +__forceinline void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r) { } // GIFRegHandler* -void GSState::GIFRegHandlerNull(const GIFReg* r) +void GSState::GIFRegHandlerNull(const GIFReg* RESTRICT r) { // ASSERT(0); } @@ -531,19 +531,19 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim) ResetPrim(); } -void GSState::GIFRegHandlerPRIM(const GIFReg* r) +void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r) { ALIGN_STACK(32); ApplyPRIM(r->PRIM); } -__forceinline void GSState::GIFRegHandlerRGBAQ(const GIFReg* r) +__forceinline void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r) { m_v.RGBAQ = (GSVector4i)r->RGBAQ; } -__forceinline void GSState::GIFRegHandlerST(const GIFReg* r) +__forceinline void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r) { m_v.ST = (GSVector4i)r->ST; @@ -554,7 +554,7 @@ __forceinline void GSState::GIFRegHandlerST(const GIFReg* r) #endif } -__forceinline void GSState::GIFRegHandlerUV(const GIFReg* r) +__forceinline void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r) { m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff; @@ -564,7 +564,7 @@ __forceinline void GSState::GIFRegHandlerUV(const GIFReg* r) #endif } -void GSState::GIFRegHandlerXYZF2(const GIFReg* r) +void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r) { /* m_v.XYZ.X = r->XYZF.X; @@ -579,7 +579,7 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* r) VertexKick(false); } -void GSState::GIFRegHandlerXYZ2(const GIFReg* r) +void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r) { m_v.XYZ = (GSVector4i)r->XYZ; @@ -624,13 +624,13 @@ void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0) r.right = GSLocalMemory::m_psm[TEX0.CPSM].pgs.x; r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].pgs.y; - InvalidateLocalMem(BITBLTBUF, r); + InvalidateLocalMem(BITBLTBUF, r, true); m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT); } } -template void GSState::GIFRegHandlerTEX0(const GIFReg* r) +template void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r) { GIFRegTEX0 TEX0 = r->TEX0; @@ -681,7 +681,7 @@ template void GSState::GIFRegHandlerTEX0(const GIFReg* r) } } -template void GSState::GIFRegHandlerCLAMP(const GIFReg* r) +template void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r) { if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP) { @@ -691,12 +691,12 @@ template void GSState::GIFRegHandlerCLAMP(const GIFReg* r) m_env.CTXT[i].CLAMP = (GSVector4i)r->CLAMP; } -void GSState::GIFRegHandlerFOG(const GIFReg* r) +void GSState::GIFRegHandlerFOG(const GIFReg* RESTRICT r) { m_v.FOG = (GSVector4i)r->FOG; } -void GSState::GIFRegHandlerXYZF3(const GIFReg* r) +void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r) { /* m_v.XYZ.X = r->XYZF.X; @@ -711,18 +711,18 @@ void GSState::GIFRegHandlerXYZF3(const GIFReg* r) VertexKick(true); } -void GSState::GIFRegHandlerXYZ3(const GIFReg* r) +void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r) { m_v.XYZ = (GSVector4i)r->XYZ; VertexKick(true); } -void GSState::GIFRegHandlerNOP(const GIFReg* r) +void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r) { } -template void GSState::GIFRegHandlerTEX1(const GIFReg* r) +template void GSState::GIFRegHandlerTEX1(const GIFReg* RESTRICT r) { if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1) { @@ -732,7 +732,7 @@ template void GSState::GIFRegHandlerTEX1(const GIFReg* r) m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1; } -template void GSState::GIFRegHandlerTEX2(const GIFReg* r) +template void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r) { // m_env.CTXT[i].TEX2 = r->TEX2; // not used @@ -749,7 +749,7 @@ template void GSState::GIFRegHandlerTEX2(const GIFReg* r) ApplyTEX0(i, TEX0); } -template void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r) +template void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r) { GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff(); @@ -763,7 +763,7 @@ template void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r) m_env.CTXT[i].UpdateScissor(); } -void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r) +void GSState::GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r) { if(r->PRMODECONT != m_env.PRMODECONT) { @@ -781,7 +781,7 @@ void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r) UpdateVertexKick(); } -void GSState::GIFRegHandlerPRMODE(const GIFReg* r) +void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r) { if(!m_env.PRMODECONT.AC) { @@ -797,7 +797,7 @@ void GSState::GIFRegHandlerPRMODE(const GIFReg* r) UpdateVertexKick(); } -void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r) +void GSState::GIFRegHandlerTEXCLUT(const GIFReg* RESTRICT r) { if(r->TEXCLUT != m_env.TEXCLUT) { @@ -807,7 +807,7 @@ void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r) m_env.TEXCLUT = (GSVector4i)r->TEXCLUT; } -void GSState::GIFRegHandlerSCANMSK(const GIFReg* r) +void GSState::GIFRegHandlerSCANMSK(const GIFReg* RESTRICT r) { if(r->SCANMSK != m_env.SCANMSK) { @@ -817,7 +817,7 @@ void GSState::GIFRegHandlerSCANMSK(const GIFReg* r) m_env.SCANMSK = (GSVector4i)r->SCANMSK; } -template void GSState::GIFRegHandlerMIPTBP1(const GIFReg* r) +template void GSState::GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r) { if(PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1) { @@ -827,7 +827,7 @@ template void GSState::GIFRegHandlerMIPTBP1(const GIFReg* r) m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1; } -template void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r) +template void GSState::GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r) { if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2) { @@ -837,7 +837,7 @@ template void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r) m_env.CTXT[i].MIPTBP2 = (GSVector4i)r->MIPTBP2; } -void GSState::GIFRegHandlerTEXA(const GIFReg* r) +void GSState::GIFRegHandlerTEXA(const GIFReg* RESTRICT r) { if(r->TEXA != m_env.TEXA) { @@ -847,7 +847,7 @@ void GSState::GIFRegHandlerTEXA(const GIFReg* r) m_env.TEXA = (GSVector4i)r->TEXA; } -void GSState::GIFRegHandlerFOGCOL(const GIFReg* r) +void GSState::GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r) { if(r->FOGCOL != m_env.FOGCOL) { @@ -857,12 +857,12 @@ void GSState::GIFRegHandlerFOGCOL(const GIFReg* r) m_env.FOGCOL = (GSVector4i)r->FOGCOL; } -void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* r) +void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r) { // TRACE(_T("TEXFLUSH\n")); } -template void GSState::GIFRegHandlerSCISSOR(const GIFReg* r) +template void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r) { if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR) { @@ -874,7 +874,7 @@ template void GSState::GIFRegHandlerSCISSOR(const GIFReg* r) m_env.CTXT[i].UpdateScissor(); } -template void GSState::GIFRegHandlerALPHA(const GIFReg* r) +template void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r) { ASSERT(r->ALPHA.A != 3); ASSERT(r->ALPHA.B != 3); @@ -893,7 +893,7 @@ template void GSState::GIFRegHandlerALPHA(const GIFReg* r) m_env.CTXT[i].ALPHA.u32[0] = ((~m_env.CTXT[i].ALPHA.u32[0] >> 1) | 0xAA) & m_env.CTXT[i].ALPHA.u32[0]; } -void GSState::GIFRegHandlerDIMX(const GIFReg* r) +void GSState::GIFRegHandlerDIMX(const GIFReg* RESTRICT r) { bool update = false; @@ -912,7 +912,7 @@ void GSState::GIFRegHandlerDIMX(const GIFReg* r) } } -void GSState::GIFRegHandlerDTHE(const GIFReg* r) +void GSState::GIFRegHandlerDTHE(const GIFReg* RESTRICT r) { if(r->DTHE != m_env.DTHE) { @@ -922,7 +922,7 @@ void GSState::GIFRegHandlerDTHE(const GIFReg* r) m_env.DTHE = (GSVector4i)r->DTHE; } -void GSState::GIFRegHandlerCOLCLAMP(const GIFReg* r) +void GSState::GIFRegHandlerCOLCLAMP(const GIFReg* RESTRICT r) { if(r->COLCLAMP != m_env.COLCLAMP) { @@ -935,7 +935,7 @@ void GSState::GIFRegHandlerCOLCLAMP(const GIFReg* r) #endif } -template void GSState::GIFRegHandlerTEST(const GIFReg* r) +template void GSState::GIFRegHandlerTEST(const GIFReg* RESTRICT r) { if(PRIM->CTXT == i && r->TEST != m_env.CTXT[i].TEST) { @@ -948,7 +948,7 @@ template void GSState::GIFRegHandlerTEST(const GIFReg* r) #endif } -void GSState::GIFRegHandlerPABE(const GIFReg* r) +void GSState::GIFRegHandlerPABE(const GIFReg* RESTRICT r) { if(r->PABE != m_env.PABE) { @@ -958,7 +958,7 @@ void GSState::GIFRegHandlerPABE(const GIFReg* r) m_env.PABE = (GSVector4i)r->PABE; } -template void GSState::GIFRegHandlerFBA(const GIFReg* r) +template void GSState::GIFRegHandlerFBA(const GIFReg* RESTRICT r) { if(PRIM->CTXT == i && r->FBA != m_env.CTXT[i].FBA) { @@ -968,7 +968,7 @@ template void GSState::GIFRegHandlerFBA(const GIFReg* r) m_env.CTXT[i].FBA = (GSVector4i)r->FBA; } -template void GSState::GIFRegHandlerFRAME(const GIFReg* r) +template void GSState::GIFRegHandlerFRAME(const GIFReg* RESTRICT r) { if(PRIM->CTXT == i && r->FRAME != m_env.CTXT[i].FRAME) { @@ -988,7 +988,7 @@ template void GSState::GIFRegHandlerFRAME(const GIFReg* r) #endif } -template void GSState::GIFRegHandlerZBUF(const GIFReg* r) +template void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r) { GIFRegZBUF ZBUF = r->ZBUF; @@ -1023,7 +1023,7 @@ template void GSState::GIFRegHandlerZBUF(const GIFReg* r) m_env.CTXT[i].ZBUF = (GSVector4i)ZBUF; } -void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* r) +void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* RESTRICT r) { if(r->BITBLTBUF != m_env.BITBLTBUF) { @@ -1043,7 +1043,7 @@ void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* r) } } -void GSState::GIFRegHandlerTRXPOS(const GIFReg* r) +void GSState::GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r) { if(r->TRXPOS != m_env.TRXPOS) { @@ -1053,7 +1053,7 @@ void GSState::GIFRegHandlerTRXPOS(const GIFReg* r) m_env.TRXPOS = (GSVector4i)r->TRXPOS; } -void GSState::GIFRegHandlerTRXREG(const GIFReg* r) +void GSState::GIFRegHandlerTRXREG(const GIFReg* RESTRICT r) { if(r->TRXREG != m_env.TRXREG) { @@ -1063,7 +1063,7 @@ void GSState::GIFRegHandlerTRXREG(const GIFReg* r) m_env.TRXREG = (GSVector4i)r->TRXREG; } -void GSState::GIFRegHandlerTRXDIR(const GIFReg* r) +void GSState::GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r) { Flush(); @@ -1083,17 +1083,19 @@ void GSState::GIFRegHandlerTRXDIR(const GIFReg* r) case 3: ASSERT(0); break; + default: + __assume(0); } } -void GSState::GIFRegHandlerHWREG(const GIFReg* r) +void GSState::GIFRegHandlerHWREG(const GIFReg* RESTRICT r) { ASSERT(m_env.TRXDIR.XDIR == 0); // host => local Write((uint8*)r, 8); // haunting ground } -void GSState::GIFRegHandlerSIGNAL(const GIFReg* r) +void GSState::GIFRegHandlerSIGNAL(const GIFReg* RESTRICT r) { m_regs->SIGLBLID.SIGID = (m_regs->SIGLBLID.SIGID & ~r->SIGNAL.IDMSK) | (r->SIGNAL.ID & r->SIGNAL.IDMSK); @@ -1101,13 +1103,13 @@ void GSState::GIFRegHandlerSIGNAL(const GIFReg* r) if(!m_regs->IMR.SIGMSK && m_irq) m_irq(); } -void GSState::GIFRegHandlerFINISH(const GIFReg* r) +void GSState::GIFRegHandlerFINISH(const GIFReg* RESTRICT r) { if(m_regs->CSR.wFINISH) m_regs->CSR.rFINISH = 1; if(!m_regs->IMR.FINISHMSK && m_irq) m_irq(); } -void GSState::GIFRegHandlerLABEL(const GIFReg* r) +void GSState::GIFRegHandlerLABEL(const GIFReg* RESTRICT r) { m_regs->SIGLBLID.LBLID = (m_regs->SIGLBLID.LBLID & ~r->LABEL.IDMSK) | (r->LABEL.ID & r->LABEL.IDMSK); } diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 8f92aa1917..4de179a7bb 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -37,70 +37,72 @@ class GSState : public GSAlignedClass<32> { - typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* r); + // RESTRICT prevents multiple loads of the same part of the register when accessing its bitfields (the compiler is happy to know that memory writes in-between will not go there) + + typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* RESTRICT r); GIFPackedRegHandler m_fpGIFPackedRegHandlers[16]; - void GIFPackedRegHandlerNull(const GIFPackedReg* r); - void GIFPackedRegHandlerRGBA(const GIFPackedReg* r); - void GIFPackedRegHandlerSTQ(const GIFPackedReg* r); - void GIFPackedRegHandlerUV(const GIFPackedReg* r); - void GIFPackedRegHandlerXYZF2(const GIFPackedReg* r); - void GIFPackedRegHandlerXYZ2(const GIFPackedReg* r); - void GIFPackedRegHandlerFOG(const GIFPackedReg* r); - void GIFPackedRegHandlerA_D(const GIFPackedReg* r); - void GIFPackedRegHandlerNOP(const GIFPackedReg* r); + void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r); + void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r); + void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r); + void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r); + void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r); + void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r); + void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r); + void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r); + void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r); - typedef void (GSState::*GIFRegHandler)(const GIFReg* r); + typedef void (GSState::*GIFRegHandler)(const GIFReg* RESTRICT r); GIFRegHandler m_fpGIFRegHandlers[256]; void ApplyTEX0(int i, GIFRegTEX0& TEX0); void ApplyPRIM(const GIFRegPRIM& PRIM); - void GIFRegHandlerNull(const GIFReg* r); - void GIFRegHandlerPRIM(const GIFReg* r); - void GIFRegHandlerRGBAQ(const GIFReg* r); - void GIFRegHandlerST(const GIFReg* r); - void GIFRegHandlerUV(const GIFReg* r); - void GIFRegHandlerXYZF2(const GIFReg* r); - void GIFRegHandlerXYZ2(const GIFReg* r); - template void GIFRegHandlerTEX0(const GIFReg* r); - template void GIFRegHandlerCLAMP(const GIFReg* r); - void GIFRegHandlerFOG(const GIFReg* r); - void GIFRegHandlerXYZF3(const GIFReg* r); - void GIFRegHandlerXYZ3(const GIFReg* r); - void GIFRegHandlerNOP(const GIFReg* r); - template void GIFRegHandlerTEX1(const GIFReg* r); - template void GIFRegHandlerTEX2(const GIFReg* r); - template void GIFRegHandlerXYOFFSET(const GIFReg* r); - void GIFRegHandlerPRMODECONT(const GIFReg* r); - void GIFRegHandlerPRMODE(const GIFReg* r); - void GIFRegHandlerTEXCLUT(const GIFReg* r); - void GIFRegHandlerSCANMSK(const GIFReg* r); - template void GIFRegHandlerMIPTBP1(const GIFReg* r); - template void GIFRegHandlerMIPTBP2(const GIFReg* r); - void GIFRegHandlerTEXA(const GIFReg* r); - void GIFRegHandlerFOGCOL(const GIFReg* r); - void GIFRegHandlerTEXFLUSH(const GIFReg* r); - template void GIFRegHandlerSCISSOR(const GIFReg* r); - template void GIFRegHandlerALPHA(const GIFReg* r); - void GIFRegHandlerDIMX(const GIFReg* r); - void GIFRegHandlerDTHE(const GIFReg* r); - void GIFRegHandlerCOLCLAMP(const GIFReg* r); - template void GIFRegHandlerTEST(const GIFReg* r); - void GIFRegHandlerPABE(const GIFReg* r); - template void GIFRegHandlerFBA(const GIFReg* r); - template void GIFRegHandlerFRAME(const GIFReg* r); - template void GIFRegHandlerZBUF(const GIFReg* r); - void GIFRegHandlerBITBLTBUF(const GIFReg* r); - void GIFRegHandlerTRXPOS(const GIFReg* r); - void GIFRegHandlerTRXREG(const GIFReg* r); - void GIFRegHandlerTRXDIR(const GIFReg* r); - void GIFRegHandlerHWREG(const GIFReg* r); - void GIFRegHandlerSIGNAL(const GIFReg* r); - void GIFRegHandlerFINISH(const GIFReg* r); - void GIFRegHandlerLABEL(const GIFReg* r); + void GIFRegHandlerNull(const GIFReg* RESTRICT r); + void GIFRegHandlerPRIM(const GIFReg* RESTRICT r); + void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r); + void GIFRegHandlerST(const GIFReg* RESTRICT r); + void GIFRegHandlerUV(const GIFReg* RESTRICT r); + void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r); + void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r); + template void GIFRegHandlerTEX0(const GIFReg* RESTRICT r); + template void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r); + void GIFRegHandlerFOG(const GIFReg* RESTRICT r); + void GIFRegHandlerXYZF3(const GIFReg* RESTRICT r); + void GIFRegHandlerXYZ3(const GIFReg* RESTRICT r); + void GIFRegHandlerNOP(const GIFReg* RESTRICT r); + template void GIFRegHandlerTEX1(const GIFReg* RESTRICT r); + template void GIFRegHandlerTEX2(const GIFReg* RESTRICT r); + template void GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r); + void GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r); + void GIFRegHandlerPRMODE(const GIFReg* RESTRICT r); + void GIFRegHandlerTEXCLUT(const GIFReg* RESTRICT r); + void GIFRegHandlerSCANMSK(const GIFReg* RESTRICT r); + template void GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r); + template void GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r); + void GIFRegHandlerTEXA(const GIFReg* RESTRICT r); + void GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r); + void GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r); + template void GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r); + template void GIFRegHandlerALPHA(const GIFReg* RESTRICT r); + void GIFRegHandlerDIMX(const GIFReg* RESTRICT r); + void GIFRegHandlerDTHE(const GIFReg* RESTRICT r); + void GIFRegHandlerCOLCLAMP(const GIFReg* RESTRICT r); + template void GIFRegHandlerTEST(const GIFReg* RESTRICT r); + void GIFRegHandlerPABE(const GIFReg* RESTRICT r); + template void GIFRegHandlerFBA(const GIFReg* RESTRICT r); + template void GIFRegHandlerFRAME(const GIFReg* RESTRICT r); + template void GIFRegHandlerZBUF(const GIFReg* RESTRICT r); + void GIFRegHandlerBITBLTBUF(const GIFReg* RESTRICT r); + void GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r); + void GIFRegHandlerTRXREG(const GIFReg* RESTRICT r); + void GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r); + void GIFRegHandlerHWREG(const GIFReg* RESTRICT r); + void GIFRegHandlerSIGNAL(const GIFReg* RESTRICT r); + void GIFRegHandlerFINISH(const GIFReg* RESTRICT r); + void GIFRegHandlerLABEL(const GIFReg* RESTRICT r); int m_version; int m_sssize; @@ -206,7 +208,7 @@ public: virtual void FlushPrim() = 0; virtual void ResetPrim() = 0; virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {} - virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {} + virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {} void Move(); void Write(const uint8* mem, int len); diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 29319a5a54..3d66576948 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -351,7 +351,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rec for(list::iterator k = l.begin(); k != l.end(); k++) { - s->m_valid[k->x] &= ~k->y; + s->m_valid[k->x] &= k->y; } } else diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index 82423017d8..d6b2046b5b 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -131,7 +131,7 @@ void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& r for(list::iterator j = l.begin(); j != l.end(); j++) { - t->m_valid[j->x] &= ~j->y; + t->m_valid[j->x] &= j->y; } } else diff --git a/plugins/GSdx/GSThread.h b/plugins/GSdx/GSThread.h index 6a20d73c62..23b5ef156e 100644 --- a/plugins/GSdx/GSThread.h +++ b/plugins/GSdx/GSThread.h @@ -54,32 +54,123 @@ public: void Unlock() {LeaveCriticalSection(&m_cs);} }; -class GSAutoResetEvent +class GSEvent { protected: HANDLE m_hEvent; public: - GSAutoResetEvent() {m_hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);} - ~GSAutoResetEvent() {CloseHandle(m_hEvent);} + GSEvent(bool manual = false, bool initial = false) {m_hEvent = CreateEvent(NULL, manual, initial, NULL);} + ~GSEvent() {CloseHandle(m_hEvent);} void Set() {SetEvent(m_hEvent);} + void Reset() {ResetEvent(m_hEvent);} bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;} }; -/* -class GSAutoResetEvent + +// TODO: pthreads version (needs manual-reset event) + +template< + class T, + class ENQUEUE_EVENT = GSEvent, + class DEQUEUE_EVENT = GSEvent> +class GSQueue : public GSCritSec { -protected: - long m_sync; - + std::list m_queue; + HANDLE m_put; + HANDLE m_get; + ENQUEUE_EVENT m_enqueue; + DEQUEUE_EVENT m_dequeue; + long m_count; + public: - GSAutoResetEvent() {m_sync = 0;} - ~GSAutoResetEvent() {} + GSQueue(long count) + : m_enqueue(true) + , m_dequeue(true) + , m_count(count) + { + m_put = CreateSemaphore(NULL, count, count, NULL); + m_get = CreateSemaphore(NULL, 0, count, NULL); - void Set() {_interlockedbittestandset(&m_sync, 0);} - bool Wait() {while(!_interlockedbittestandreset(&m_sync, 0)) _mm_pause(); return true;} + m_dequeue.Set(); + } + + virtual ~GSQueue() + { + CloseHandle(m_put); + CloseHandle(m_get); + } + + size_t GetCount() const + { + // GSAutoLock cAutoLock(this); + + return m_queue.size(); + } + + size_t GetMaxCount() const + { + // GSAutoLock cAutoLock(this); + + return (size_t)m_count; + } + + ENQUEUE_EVENT& GetEnqueueEvent() + { + return m_enqueue; + } + + DEQUEUE_EVENT& GetDequeueEvent() + { + return m_dequeue; + } + + void Enqueue(T item) + { + WaitForSingleObject(m_put, INFINITE); + + { + GSAutoLock cAutoLock(this); + + m_queue.push_back(item); + + m_enqueue.Set(); + m_dequeue.Reset(); + } + + ReleaseSemaphore(m_get, 1, NULL); + } + + T Dequeue() + { + T item; + + WaitForSingleObject(m_get, INFINITE); + + { + GSAutoLock cAutoLock(this); + + item = m_queue.front(); + + m_queue.pop_front(); + + if(m_queue.empty()) + { + m_enqueue.Reset(); + m_dequeue.Set(); + } + } + + ReleaseSemaphore(m_put, 1, NULL); + + return item; + } + + T Peek() // lock on "this" + { + return m_queue.front(); + } }; -*/ #else @@ -128,14 +219,14 @@ public: void Unlock() {pthread_mutex_unlock(&m_mutex);} }; -class GSAutoResetEvent +class GSEvent { protected: sem_t m_sem; public: - GSAutoResetEvent() {sem_init(&m_sem, 0, 0);} - ~GSAutoResetEvent() {sem_destroy(&m_sem);} + GSEvent() {sem_init(&m_sem, 0, 0);} + ~GSEvent() {sem_destroy(&m_sem);} void Set() {sem_post(&m_sem);} bool Wait() {return sem_wait(&m_sem) == 0;} @@ -152,3 +243,23 @@ public: GSAutoLock(GSCritSec* cs) {m_cs = cs; m_cs->Lock();} ~GSAutoLock() {m_cs->Unlock();} }; + +class GSEventSpin +{ +protected: + volatile long m_sync; + volatile bool m_manual; + +public: + GSEventSpin(bool manual = false, bool initial = false) {m_sync = initial ? 1 : 0; m_manual = manual;} + ~GSEventSpin() {} + + void Set() {_interlockedbittestandset(&m_sync, 0);} + void Reset() {_interlockedbittestandreset(&m_sync, 0);} + bool Wait() + { + if(m_manual) while(!m_sync) _mm_pause(); + else while(!_interlockedbittestandreset(&m_sync, 0)) _mm_pause(); + return true; + } +}; diff --git a/plugins/GSdx/GSdx.rc b/plugins/GSdx/GSdx.rc index 135f0a41f0..9cc9fc1ec0 100644 --- a/plugins/GSdx/GSdx.rc +++ b/plugins/GSdx/GSdx.rc @@ -13,13 +13,11 @@ #undef APSTUDIO_READONLY_SYMBOLS ///////////////////////////////////////////////////////////////////////////// -// English (U.S.) resources +// English (United States) resources #if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) -#ifdef _WIN32 LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US #pragma code_page(1252) -#endif //_WIN32 #ifdef APSTUDIO_INVOKED ///////////////////////////////////////////////////////////////////////////// @@ -58,7 +56,7 @@ IDR_CONVERT_FX RCDATA "res\\convert.fx" IDR_TFX_FX RCDATA "res\\tfx.fx" IDR_MERGE_FX RCDATA "res\\merge.fx" IDR_INTERLACE_FX RCDATA "res\\interlace.fx" -IDR_FXAA_FX RCDATA "res\\fxaa.fx" +IDR_FXAA_FX RCDATA "res\\fxaa.fx" ///////////////////////////////////////////////////////////////////////////// // @@ -104,9 +102,9 @@ BEGIN CONTROL "",IDC_MSAA,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,109,261,11,14 LTEXT "HW Anti Aliasing",IDC_STATIC_TEXT_HWAA,18,261,53,8 GROUPBOX "D3D Enhancements (can cause glitches)",IDC_STATIC,7,117,175,66 - LTEXT "SW rend. threads:",IDC_STATIC,7,189,60,8 - EDITTEXT IDC_SWTHREADS_EDIT,71,187,35,13,ES_AUTOHSCROLL | ES_NUMBER - CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,190,11,14 + LTEXT "Extra rendering threads:",IDC_STATIC,7,189,80,8 + EDITTEXT IDC_SWTHREADS_EDIT,89,187,35,13,ES_AUTOHSCROLL | ES_NUMBER + CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,117,190,11,14 CONTROL "Texture filtering",IDC_FILTER,"Button",BS_AUTO3STATE | WS_TABSTOP,7,203,67,10 CONTROL "Logarithmic Z",IDC_LOGZ,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,89,204,58,10 CONTROL "Allow 8-bit textures",IDC_PALTEX,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,217,82,10 @@ -144,23 +142,23 @@ FONT 8, "MS Shell Dlg", 400, 0, 0x1 BEGIN CONTROL 2021,IDC_LOGO9,"Static",SS_BITMAP,7,7,175,44 LTEXT "Resolution:",IDC_STATIC,7,59,37,8 - COMBOBOX IDC_RESOLUTION,78,57,104,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_RESOLUTION,80,57,102,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP LTEXT "Renderer:",IDC_STATIC,7,74,34,8 - COMBOBOX IDC_RENDERER,78,72,104,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_RENDERER,80,72,102,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP LTEXT "Texture Filter (Del):",IDC_STATIC,7,90,64,8 - COMBOBOX IDC_FILTER,78,87,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_FILTER,80,87,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP LTEXT "Dithering (End):",IDC_STATIC,7,105,52,8 - COMBOBOX IDC_DITHERING,78,102,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_DITHERING,80,102,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP LTEXT "Aspect Ratio (PgDn):",IDC_STATIC,7,120,68,8 - COMBOBOX IDC_ASPECTRATIO,78,117,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - LTEXT "Rendering Threads:",IDC_STATIC,7,157,64,8 - EDITTEXT IDC_SWTHREADS_EDIT,78,155,35,13,ES_AUTOHSCROLL | ES_NUMBER + COMBOBOX IDC_ASPECTRATIO,80,117,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + LTEXT "Extra Rend. Threads:",IDC_STATIC,7,157,70,8 + EDITTEXT IDC_SWTHREADS_EDIT,80,155,35,13,ES_AUTOHSCROLL | ES_NUMBER CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,161,11,14 DEFPUSHBUTTON "OK",IDOK,43,178,50,14 PUSHBUTTON "Cancel",IDCANCEL,96,178,50,14 CONTROL 2022,IDC_LOGO11,"Static",SS_BITMAP,7,7,173,42 LTEXT "Internal Resolution:",IDC_STATIC,7,135,64,8 - COMBOBOX IDC_SCALE,78,132,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_SCALE,80,132,102,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP CONTROL "Windowed",IDC_WINDOWED,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,129,157,49,10 END @@ -181,9 +179,9 @@ BEGIN EDITTEXT IDC_RESY_EDIT,130,132,35,13,ES_AUTOHSCROLL | ES_NUMBER CONTROL "",IDC_RESY,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,154,130,11,14 CONTROL "Native",IDC_NATIVERES,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,92,105,33,10 - LTEXT "Rendering threads:",IDC_STATIC,19,214,63,8 - EDITTEXT IDC_SWTHREADS_EDIT,87,212,35,13,ES_AUTOHSCROLL | ES_NUMBER - CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,115,215,11,14 + LTEXT "Extra rendering threads:",IDC_STATIC,19,214,80,8 + EDITTEXT IDC_SWTHREADS_EDIT,102,212,35,13,ES_AUTOHSCROLL | ES_NUMBER + CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,135,215,11,14 COMBOBOX IDC_UPSCALE_MULTIPLIER,92,117,74,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP LTEXT "Or use Scaling:",IDC_STATIC,38,120,49,8 LTEXT "Original PS2 resolution :",IDC_STATIC,10,105,80,8 @@ -215,7 +213,7 @@ END // #ifdef APSTUDIO_INVOKED -GUIDELINES DESIGNINFO +GUIDELINES DESIGNINFO BEGIN IDD_CONFIG, DIALOG BEGIN @@ -241,7 +239,7 @@ BEGIN BEGIN LEFTMARGIN, 7 RIGHTMARGIN, 182 - VERTGUIDE, 78 + VERTGUIDE, 80 VERTGUIDE, 182 TOPMARGIN, 7 BOTTOMMARGIN, 192 @@ -298,7 +296,7 @@ BEGIN END END -#endif // English (U.S.) resources +#endif // English (United States) resources ///////////////////////////////////////////////////////////////////////////// @@ -312,7 +310,6 @@ END #include "res/convert.fx" #include "res/interlace.fx" #include "res/merge.fx" -#include "res/fxaa.fx" ///////////////////////////////////////////////////////////////////////////// #endif // not APSTUDIO_INVOKED diff --git a/plugins/GSdx/resource.h b/plugins/GSdx/resource.h index 6744e7b03a..43d9df57fe 100644 --- a/plugins/GSdx/resource.h +++ b/plugins/GSdx/resource.h @@ -80,7 +80,7 @@ #define IDR_MERGE_FX 10002 #define IDR_INTERLACE_FX 10003 #define IDD_CONFIG2 10004 -#define IDR_FXAA_FX 10005 +#define IDR_FXAA_FX 10005 // Next default values for new objects // diff --git a/plugins/GSdx/stdafx.h b/plugins/GSdx/stdafx.h index f7d5de3879..3158713a6a 100644 --- a/plugins/GSdx/stdafx.h +++ b/plugins/GSdx/stdafx.h @@ -317,6 +317,23 @@ typedef signed long long int64; return retval; } + __forceinline long _InterlockedExchangeAdd(volatile long* const Addend, const long Value) + { + long retval = Value; + __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory"); + return retval; + } + + __forceinline long _InterlockedDecrement(volatile long* const lpAddend) + { + return _InterlockedExchangeAdd(lpAddend, -1) - 1; + } + + __forceinline long _InterlockedIncrement(volatile long* const lpAddend) + { + return _InterlockedExchangeAdd(lpAddend, 1) + 1; + } + #ifdef __GNUC__ __forceinline unsigned long long __rdtsc()