GSdx: fixing/breaking things again... palletized texture lookup can be done by pixel shader now (selectable, off by default), if you have a fast card it may help with texture heavy games, otherwise it is only going to be slower.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1472 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-07-06 16:35:06 +00:00
parent 4acf6c6a8c
commit f9f056d581
38 changed files with 732 additions and 1110 deletions

View File

@ -23,7 +23,7 @@
#include "GSClut.h"
#include "GSLocalMemory.h"
GSClut::GSClut(const GSLocalMemory* mem)
GSClut::GSClut(GSLocalMemory* mem)
: m_mem(mem)
{
uint8* p = (uint8*)VirtualAlloc(NULL, 2 * 4096, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
@ -168,14 +168,16 @@ void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& T
template<int n> void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
GSLocalMemory::PixelOffset* po = m_mem->GetPixelOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
uint32 base = m_mem->PixelAddress32(0, TEXCLUT.COV, TEX0.CBP, TEXCLUT.CBW);
int* offset = &m_mem->rowOffset32[TEXCLUT.COU << 4];
uint32* RESTRICT s = &m_mem->m_vm32[po->row[TEXCLUT.COV]];
int* RESTRICT o = &po->col[0][TEXCLUT.COU << 4];
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for(int i = 0; i < n; i++)
{
uint32 c = m_mem->ReadPixel32(base + offset[i]);
uint32 c = s[o[i]];
clut[i] = (uint16)(c & 0xffff);
clut[i + 256] = (uint16)(c >> 16);
@ -184,27 +186,31 @@ template<int n> void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFR
template<int n> void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
GSLocalMemory::PixelOffset* po = m_mem->GetPixelOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
uint32 base = m_mem->PixelAddress16(0, TEXCLUT.COV, TEX0.CBP, TEXCLUT.CBW);
int* offset = &m_mem->rowOffset16[TEXCLUT.COU << 4];
uint16* RESTRICT s = &m_mem->m_vm16[po->row[TEXCLUT.COV]];
int* RESTRICT o = &po->col[0][TEXCLUT.COU << 4];
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for(int i = 0; i < n; i++)
{
clut[i] = (uint16)m_mem->ReadPixel16(base + offset[i]);
clut[i] = s[o[i]];
}
}
template<int n> void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
GSLocalMemory::PixelOffset* po = m_mem->GetPixelOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
uint32 base = m_mem->PixelAddress16S(0, TEXCLUT.COV, TEX0.CBP, TEXCLUT.CBW);
int* offset = &m_mem->rowOffset16S[TEXCLUT.COU << 4];
uint16* RESTRICT s = &m_mem->m_vm16[po->row[TEXCLUT.COV]];
int* RESTRICT o = &po->col[0][TEXCLUT.COU << 4];
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for(int i = 0; i < n; i++)
{
clut[i] = (uint16)m_mem->ReadPixel16(base + offset[i]);
clut[i] = s[o[i]];
}
}

View File

@ -30,7 +30,7 @@ class GSLocalMemory;
__declspec(align(16)) class GSClut : public GSAlignedClass<16>
{
const GSLocalMemory* m_mem;
GSLocalMemory* m_mem;
uint32 m_CBP[2];
uint16* m_clut;
@ -93,7 +93,7 @@ __declspec(align(16)) class GSClut : public GSAlignedClass<16>
static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA);
public:
GSClut(const GSLocalMemory* mem);
GSClut(GSLocalMemory* mem);
virtual ~GSClut();
void Invalidate();

View File

@ -24,6 +24,7 @@
GSDevice::GSDevice()
: m_wnd(NULL)
, m_rbswapped(false)
, m_backbuffer(NULL)
, m_merge(NULL)
, m_weavebob(NULL)

View File

@ -55,6 +55,7 @@ class GSDevice : public GSAlignedClass<16>
protected:
GSWnd* m_wnd;
bool m_vsync;
bool m_rbswapped;
GSTexture* m_backbuffer;
GSTexture* m_merge;
GSTexture* m_weavebob;
@ -97,17 +98,20 @@ public:
virtual GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0) {return NULL;}
virtual void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r) {}
virtual void StretchRect(GSTexture* st, GSTexture* dt, const GSVector4& dr, int shader = 0, bool linear = true);
virtual void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader = 0, bool linear = true) {}
GSTexture* GetCurrent();
virtual bool IsCurrentRGBA() {return true;}
void Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c);
void Interlace(const GSVector2i& ds, int field, int mode, float yoffset);
bool ResizeTexture(GSTexture** t, int w, int h);
bool IsRBSwapped() {return m_rbswapped;}
template<class T> void PrepareShaderMacro(vector<T>& dst, const T* src, const char* model)
{
dst.clear();

View File

@ -392,6 +392,13 @@ GSTexture* GSDevice10::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w,
return dst;
}
void GSDevice10::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r)
{
D3D10_BOX box = {r.left, r.top, 0, r.right, r.bottom, 1};
m_dev->CopySubresourceRegion(*(GSTexture10*)dt, 0, 0, 0, 0, *(GSTexture10*)st, 0, &box);
}
void GSDevice10::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader, bool linear)
{
StretchRect(st, sr, dt, dr, m_convert.ps[shader], NULL, linear);

View File

@ -115,6 +115,8 @@ public:
GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0);
void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader = 0, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, ID3D10BlendState* bs, bool linear = true);

View File

@ -92,8 +92,8 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync)
flags |= D3D11_CREATE_DEVICE_DEBUG;
#endif
hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, 0, NULL, 0, D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &m_level, &m_ctx);
// hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_REFERENCE, NULL, 0, NULL, 0, D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &m_level, &m_ctx);
hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, NULL, 0, D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &m_level, &m_ctx);
// hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_REFERENCE, NULL, flags, NULL, 0, D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &m_level, &m_ctx);
if(FAILED(hr)) return false;
@ -429,6 +429,13 @@ GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w,
return dst;
}
void GSDevice11::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r)
{
D3D11_BOX box = {r.left, r.top, 0, r.right, r.bottom, 1};
m_ctx->CopySubresourceRegion(*(GSTexture11*)dt, 0, 0, 0, 0, *(GSTexture11*)st, 0, &box);
}
void GSDevice11::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader, bool linear)
{
StretchRect(st, sr, dt, dr, m_convert.ps[shader], NULL, linear);

View File

@ -118,6 +118,8 @@ public:
GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0);
void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader = 0, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);

View File

@ -44,6 +44,8 @@ GSDevice9::GSDevice9()
, m_dsv(NULL)
, m_lost(false)
{
m_rbswapped = true;
memset(&m_pp, 0, sizeof(m_pp));
memset(&m_ddcaps, 0, sizeof(m_ddcaps));
memset(&m_d3dcaps, 0, sizeof(m_d3dcaps));
@ -571,6 +573,11 @@ GSTexture* GSDevice9::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w,
return dst;
}
void GSDevice9::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r)
{
m_dev->StretchRect(*(GSTexture9*)st, r, *(GSTexture9*)dt, r, D3DTEXF_POINT);
}
void GSDevice9::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader, bool linear)
{
StretchRect(st, sr, dt, dr, m_convert.ps[shader], NULL, 0, linear);

View File

@ -153,7 +153,7 @@ public:
GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0);
virtual bool IsCurrentRGBA() {return false;}
void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader = 0, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true);

View File

@ -333,6 +333,11 @@ GSTexture* GSDeviceOGL::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w
return NULL;
}
void GSDeviceOGL::CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r)
{
// TODO
}
void GSDeviceOGL::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader, bool linear)
{
// TODO

View File

@ -130,6 +130,8 @@ public:
GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0);
void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader = 0, bool linear = true);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);

View File

@ -283,7 +283,7 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
}
template<class T, bool masked>
void GSDrawScanline::DrawSolidRectT(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m)
void GSDrawScanline::DrawSolidRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
{
if(m == 0xffffffff) return;
@ -320,35 +320,33 @@ void GSDrawScanline::DrawSolidRectT(const int* row, int* col, const GSVector4i&
}
template<class T, bool masked>
void GSDrawScanline::FillRect(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m)
void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
{
if(r.x >= r.z) return;
for(int y = r.y; y < r.w; y++)
{
uint32 base = row[y];
T* RESTRICT d = &((T*)m_env.vm)[row[y]];
for(int x = r.x; x < r.z; x++)
{
T* p = &((T*)m_env.vm)[base + col[x]];
*p = (T)(!masked ? c : (c | (*p & m)));
d[col[x]] = (T)(!masked ? c : (c | (d[col[x]] & m)));
}
}
}
template<class T, bool masked>
void GSDrawScanline::FillBlock(const int* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
{
if(r.x >= r.z) return;
for(int y = r.y; y < r.w; y += 8)
{
uint32 base = row[y];
T* RESTRICT d = &((T*)m_env.vm)[row[y]];
for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{
GSVector4i* p = (GSVector4i*)&((T*)m_env.vm)[base + col[x]];
GSVector4i* RESTRICT p = (GSVector4i*)&d[col[x]];
for(int i = 0; i < 16; i += 4)
{

View File

@ -59,13 +59,13 @@ class GSDrawScanline : public IDrawScanline
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
template<class T, bool masked>
void DrawSolidRectT(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m);
void DrawSolidRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
template<class T, bool masked>
__forceinline void FillRect(const int* row, int* col, const GSVector4i& r, uint32 c, uint32 m);
__forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
template<class T, bool masked>
__forceinline void FillBlock(const int* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
protected:
GSState* m_state;

View File

@ -512,8 +512,6 @@ GSLocalMemory::BlockOffset* GSLocalMemory::GetBlockOffset(uint32 bp, uint32 bw,
GSLocalMemory::PixelOffset* GSLocalMemory::GetPixelOffset(uint32 bp, uint32 bw, uint32 psm)
{
if(bw == 0) {ASSERT(0); return NULL;}
uint32 hash = bp | (bw << 14) | (psm << 20);
hash_map<uint32, PixelOffset*>::iterator i = m_pomap.find(hash);

View File

@ -635,6 +635,77 @@ public:
WriteFrame16(PixelAddress16SZ(x, y, bp, bw), c);
}
__forceinline void WritePixel32(uint8* RESTRICT src, uint32 pitch, PixelOffset* po, const GSVector4i& r)
{
src -= r.left * sizeof(uint32);
for(int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint32* RESTRICT d = &m_vm32[po->row[y]];
int* RESTRICT o = po->col[0];
for(int x = r.left; x < r.right; x++)
{
d[o[x]] = s[x];
}
}
}
__forceinline void WritePixel24(uint8* RESTRICT src, uint32 pitch, PixelOffset* po, const GSVector4i& r)
{
src -= r.left * sizeof(uint32);
for(int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint32* RESTRICT d = &m_vm32[po->row[y]];
int* RESTRICT o = po->col[0];
for(int x = r.left; x < r.right; x++)
{
d[o[x]] = (d[o[x]] & 0xff000000) | (s[x] & 0x00ffffff);
}
}
}
__forceinline void WritePixel16(uint8* RESTRICT src, uint32 pitch, PixelOffset* po, const GSVector4i& r)
{
src -= r.left * sizeof(uint16);
for(int y = r.top; y < r.bottom; y++, src += pitch)
{
uint16* RESTRICT s = (uint16*)src;
uint16* RESTRICT d = &m_vm16[po->row[y]];
int* RESTRICT o = po->col[0];
for(int x = r.left; x < r.right; x++)
{
d[o[x]] = s[x];
}
}
}
__forceinline void WriteFrame16(uint8* RESTRICT src, uint32 pitch, PixelOffset* po, const GSVector4i& r)
{
src -= r.left * sizeof(uint32);
for(int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint16* RESTRICT d = &m_vm16[po->row[y]];
int* RESTRICT o = po->col[0];
for(int x = r.left; x < r.right; x++)
{
uint32 rb = s[x] & 0x00f800f8;
uint32 ga = s[x] & 0x8000f800;
d[o[x]] = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3);
}
}
}
__forceinline uint32 ReadTexel32(uint32 addr, const GIFRegTEXA& TEXA) const
{
return m_vm32[addr];

View File

@ -374,7 +374,7 @@ void GSRenderer::VSync(int field)
if(offscreen->Map(m))
{
m_capture.DeliverFrame(m.bits, m.pitch, m_dev->IsCurrentRGBA());
m_capture.DeliverFrame(m.bits, m.pitch, m_dev->IsRBSwapped());
offscreen->Unmap();
}

View File

@ -193,7 +193,6 @@ public:
ps_sel.fst = PRIM->FST;
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
ps_sel.bpp = 0;
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;
@ -236,7 +235,7 @@ public:
if(tex)
{
ps_sel.bpp = tex->m_bpp;
ps_sel.fmt = tex->m_fmt;
ps_sel.rt = tex->m_target;
int w = tex->m_texture->GetWidth();

View File

@ -121,6 +121,7 @@ void GSSettingsDlg::OnInit()
ComboBoxInit(IDC_ASPECTRATIO, g_aspectratio, countof(g_aspectratio), theApp.GetConfig("AspectRatio", 1));
CheckDlgButton(m_hWnd, IDC_FILTER, theApp.GetConfig("filter", 1));
CheckDlgButton(m_hWnd, IDC_PALTEX, theApp.GetConfig("paltex", 0));
CheckDlgButton(m_hWnd, IDC_VSYNC, theApp.GetConfig("vsync", 0));
CheckDlgButton(m_hWnd, IDC_LOGZ, theApp.GetConfig("logz", 0));
CheckDlgButton(m_hWnd, IDC_FBA, theApp.GetConfig("fba", 1));
@ -179,6 +180,7 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code)
}
theApp.SetConfig("filter", (int)IsDlgButtonChecked(m_hWnd, IDC_FILTER));
theApp.SetConfig("paltex", (int)IsDlgButtonChecked(m_hWnd, IDC_PALTEX));
theApp.SetConfig("vsync", (int)IsDlgButtonChecked(m_hWnd, IDC_VSYNC));
theApp.SetConfig("logz", (int)IsDlgButtonChecked(m_hWnd, IDC_LOGZ));
theApp.SetConfig("fba", (int)IsDlgButtonChecked(m_hWnd, IDC_FBA));
@ -220,6 +222,7 @@ void GSSettingsDlg::UpdateControls()
EnableWindow(GetDlgItem(m_hWnd, IDC_RESY_EDIT), hw && !native);
EnableWindow(GetDlgItem(m_hWnd, IDC_NATIVERES), hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_FILTER), hw && !native);
EnableWindow(GetDlgItem(m_hWnd, IDC_PALTEX), hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9 && hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_FBA), dx9 && hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw);

View File

@ -1088,62 +1088,92 @@ void GSState::Move()
if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
{
int* soffset = spo->col[0];
int* doffset = dpo->col[0];
int* RESTRICT scol = &spo->col[0][sx];
int* RESTRICT dcol = &dpo->col[0][dx];
if(spsm.trbpp == 32)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
if(xinc > 0)
{
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->row[dy]];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
for(int x = 0; x < w; x++) d[dcol[x]] = s[scol[x]];
}
}
else
{
m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx]));
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->row[dy]];
for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]];
}
}
}
else if(spsm.trbpp == 24)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
if(xinc > 0)
{
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->row[dy]];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
for(int x = 0; x < w; x++) d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
}
}
else
{
m_mem.WritePixel24(dbase + doffset[dx], m_mem.ReadPixel24(sbase + soffset[sx]));
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->row[dy]];
for(int x = 0; x > -w; x--) d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
}
}
}
else // if(spsm.trbpp == 16)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
if(xinc > 0)
{
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint16* RESTRICT s = &m_mem.m_vm16[spo->row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->row[dy]];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
for(int x = 0; x < w; x++) d[dcol[x]] = s[scol[x]];
}
}
else
{
m_mem.WritePixel16(dbase + doffset[dx], m_mem.ReadPixel16(sbase + soffset[sx]));
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint16* RESTRICT s = &m_mem.m_vm16[spo->row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->row[dy]];
for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]];
}
}
}
}
else if(m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
uint8* RESTRICT s = &m_mem.m_vm8[spo->row[sy]];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->row[dy]];
int* soffset = spo->col[sy & 7];
int* doffset = dpo->col[dy & 7];
int* RESTRICT scol = &spo->col[sy & 7][sx];
int* RESTRICT dcol = &dpo->col[dy & 7][dx];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
for(int x = 0; x < w; x++, scol += xinc, dcol += xinc)
{
m_mem.WritePixel8(dbase + doffset[dx], m_mem.ReadPixel8(sbase + soffset[sx]));
d[*dcol] = s[*scol];
}
}
}
@ -1151,15 +1181,23 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
uint8* RESTRICT s = &m_mem.m_vm8[spo->row[sy] >> 1];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->row[dy] >> 1];
int* soffset = spo->col[sy & 7];
int* doffset = dpo->col[dy & 7];
int* RESTRICT scol = &spo->col[sy & 7][sx];
int* RESTRICT dcol = &dpo->col[dy & 7][dx];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
for(int x = 0; x < w; x += 2)
{
m_mem.WritePixel4(dbase + doffset[dx], m_mem.ReadPixel4(sbase + soffset[sx]));
d[*dcol >> 1] = (d[*dcol >> 1] & 0xf0) | (s[*scol >> 1] & 0x0f);
scol += xinc;
dcol += xinc;
d[*dcol >> 1] = (d[*dcol >> 1] & 0x0f) | (s[*scol >> 1] & 0xf0);
scol += xinc;
dcol += xinc;
}
}
}
@ -1170,12 +1208,12 @@ void GSState::Move()
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
int* soffset = spo->col[sy & 7];
int* doffset = dpo->col[dy & 7];
int* RESTRICT scol = spo->col[sy & 7];
int* RESTRICT dcol = dpo->col[dy & 7];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
(m_mem.*dpsm.wpa)(dbase + doffset[dx], (m_mem.*spsm.rpa)(sbase + soffset[sx]));
(m_mem.*dpsm.wpa)(dbase + dcol[dx], (m_mem.*spsm.rpa)(sbase + scol[sx]));
}
}
}

View File

@ -87,7 +87,14 @@ bool GSTexture9::Update(const GSVector4i& r, const void* data, int pitch)
uint8* src = (uint8*)data;
uint8* dst = (uint8*)lr.pBits;
int bytes = r.width() << (m_desc.Format == D3DFMT_A1R5G5B5 ? 1 : 2);
int bytes = r.width() * sizeof(uint32);
switch(m_desc.Format)
{
case D3DFMT_A8: bytes >>= 2; break;
case D3DFMT_A1R5G5B5: bytes >>= 1; break;
default: ASSERT(m_desc.Format == D3DFMT_A8R8G8B8); break;
}
bytes = min(bytes, pitch);
bytes = min(bytes, lr.Pitch);

View File

@ -21,10 +21,12 @@
#include "StdAfx.h"
#include "GSTextureCache.h"
#include "GSTextureFX.h"
GSTextureCache::GSTextureCache(GSRenderer* r)
: m_renderer(r)
{
m_paltex = !!theApp.GetConfig("paltex", 0);
}
GSTextureCache::~GSTextureCache()
@ -67,7 +69,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
continue;
}
if(psm.pal > 0 && !GSVector4i::compare(clut, s->m_clut, psm.pal * sizeof(clut[0])))
if(s->m_palette == NULL && psm.pal > 0 && !GSVector4i::compare(clut, s->m_clut, psm.pal * sizeof(clut[0])))
{
continue;
}
@ -104,7 +106,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
{
src = CreateSource();
if(!(dst ? src->Create(dst) : src->Create()))
if(!(dst ? src->Create(dst) : src->Create(m_paltex)))
{
delete src;
@ -125,7 +127,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
if(src->m_palette)
{
if(src->m_initpalette || GSVector4i::update(src->m_clut, clut, size))
if(src->m_initpalette || !GSVector4i::update(src->m_clut, clut, size))
{
src->m_palette->Update(GSVector4i(0, 0, psm.pal, 1), src->m_clut, size);
src->m_initpalette = false;
@ -490,8 +492,8 @@ void GSTextureCache::Surface::Update()
GSTextureCache::Source::Source(GSRenderer* r)
: Surface(r)
, m_palette(NULL)
, m_initpalette(false)
, m_bpp(0)
, m_initpalette(true)
, m_fmt(0)
, m_target(false)
, m_complete(false)
{
@ -514,6 +516,197 @@ GSTextureCache::Source::~Source()
_aligned_free(m_write.rect);
}
bool GSTextureCache::Source::Create(bool paltex)
{
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
ASSERT(m_texture == NULL);
if(paltex && GSLocalMemory::m_psm[m_TEX0.PSM].pal > 0)
{
m_fmt = GSTextureFX::FMT_8;
m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH, Get8bitFormat());
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
}
else
{
m_fmt = GSTextureFX::FMT_32;
m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH);
}
return m_texture != NULL;
}
bool GSTextureCache::Source::Create(Target* dst)
{
m_target = true;
if(dst->m_type != RenderTarget)
{
// TODO
return false;
}
// TODO: clean up this mess
dst->Update();
// m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
int tw = 1 << m_TEX0.TW;
int th = 1 << m_TEX0.TH;
int tp = (int)m_TEX0.TW << 6;
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
int w = (int)(dst->m_texture->m_scale.x * tw);
int h = (int)(dst->m_texture->m_scale.y * th);
GSVector2i dstsize = dst->m_texture->GetSize();
// pitch conversion
if(dst->m_TEX0.TBW != m_TEX0.TBW) // && dst->m_TEX0.PSM == m_TEX0.PSM
{
// sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left)
// ASSERT(dst->m_TEX0.TBW > m_TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO)
ASSERT(m_texture == NULL);
m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y);
GSVector4 size = GSVector4(dstsize).xyxy();
GSVector4 scale = GSVector4(dst->m_texture->m_scale).xyxy();
int bw = 64;
int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
GSVector4i br(0, 0, bw, bh);
int sw = (int)dst->m_TEX0.TBW << 6;
int dw = (int)m_TEX0.TBW << 6;
int dh = 1 << m_TEX0.TH;
if(sw != 0)
for(int dy = 0; dy < dh; dy += bh)
{
for(int dx = 0; dx < dw; dx += bw)
{
int o = dy * dw / bh + dx;
int sx = o % sw;
int sy = o / sw;
GSVector4 sr = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
GSVector4 dr = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
m_renderer->m_dev->StretchRect(dst->m_texture, sr, m_texture, dr);
// TODO: this is quite a lot of StretchRect, do it with one Draw
}
}
}
else if(tw < tp)
{
// FIXME: timesplitters blurs the render target by blending itself over a couple of times
if(tw == 256 && th == 128 && tp == 512 && (m_TEX0.TBP0 == 0 || m_TEX0.TBP0 == 0x00e00))
{
return false;
}
}
// width/height conversion
GSVector2 scale = dst->m_texture->m_scale;
GSVector4 dr(0, 0, w, h);
if(w > dstsize.x)
{
scale.x = (float)dstsize.x / tw;
dr.z = (float)dstsize.x * scale.x / dst->m_texture->m_scale.x;
w = dstsize.x;
}
if(h > dstsize.y)
{
scale.y = (float)dstsize.y / th;
dr.w = (float)dstsize.y * scale.y / dst->m_texture->m_scale.y;
h = dstsize.y;
}
GSVector4 sr(0, 0, w, h);
GSTexture* st = m_texture ? m_texture : dst->m_texture;
GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h);
if(!m_texture)
{
m_texture = dt;
}
if((sr == dr).alltrue())
{
m_renderer->m_dev->CopyRect(st, dt, GSVector4i(0, 0, w, h));
}
else
{
sr.z /= st->GetWidth();
sr.w /= st->GetHeight();
m_renderer->m_dev->StretchRect(st, sr, dt, dr);
}
if(dt != m_texture)
{
m_renderer->m_dev->Recycle(m_texture);
m_texture = dt;
}
m_texture->m_scale = scale;
switch(m_TEX0.PSM)
{
default:
ASSERT(0);
case PSM_PSMCT32:
m_fmt = GSTextureFX::FMT_32;
break;
case PSM_PSMCT24:
m_fmt = GSTextureFX::FMT_24;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_fmt = GSTextureFX::FMT_16;
break;
case PSM_PSMT8H:
m_fmt = GSTextureFX::FMT_8H;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
break;
case PSM_PSMT4HL:
m_fmt = GSTextureFX::FMT_4HL;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
break;
case PSM_PSMT4HH:
m_fmt = GSTextureFX::FMT_4HH;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
break;
}
return true;
}
void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
{
__super::Update();
@ -649,6 +842,12 @@ void GSTextureCache::Source::Flush(uint32 count)
GSLocalMemory::readTexture rtx = psm.rtx;
if(m_fmt == GSTextureFX::FMT_8)
{
pitch >>= 2;
rtx = psm.rtxP;
}
for(uint32 i = 0; i < count; i++)
{
GSVector4i r = m_write.rect[i];
@ -798,7 +997,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMe
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
GSVector2i bs = (TEX0.TBP0 & 31) ? psm.pgs : psm.bs;
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;

View File

@ -55,12 +55,15 @@ public:
void Write(const GSVector4i& r);
void Flush(uint32 count);
protected:
virtual int Get8bitFormat() = 0;
public:
GSTexture* m_palette;
bool m_initpalette;
uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page
uint32* m_clut;
int m_bpp;
int m_fmt;
bool m_target;
bool m_complete;
@ -68,8 +71,8 @@ public:
explicit Source(GSRenderer* renderer);
virtual ~Source();
virtual bool Create() = 0;
virtual bool Create(Target* dst) = 0;
virtual bool Create(bool paltex);
virtual bool Create(Target* dst);
virtual void Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect);
};
@ -90,6 +93,7 @@ public:
protected:
GSRenderer* m_renderer;
bool m_paltex;
struct SourceMap
{

View File

@ -31,193 +31,6 @@ GSTextureCache10::GSTextureCache10(GSRenderer* r)
// Source10
bool GSTextureCache10::Source10::Create()
{
// m_renderer->m_perfmon.Put(GSPerfMon::WriteTexture, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
m_bpp = 0;
ASSERT(m_texture == NULL);
m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH);
return m_texture != NULL;
}
bool GSTextureCache10::Source10::Create(Target* dst)
{
m_target = true;
if(dst->m_type != RenderTarget)
{
// TODO
return false;
}
// TODO: clean up this mess
dst->Update();
// m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
int tw = 1 << m_TEX0.TW;
int th = 1 << m_TEX0.TH;
int tp = (int)m_TEX0.TW << 6;
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
int w = (int)(dst->m_texture->m_scale.x * tw);
int h = (int)(dst->m_texture->m_scale.y * th);
GSVector2i dstsize = dst->m_texture->GetSize();
// pitch conversion
if(dst->m_TEX0.TBW != m_TEX0.TBW) // && dst->m_TEX0.PSM == m_TEX0.PSM
{
// sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left)
// ASSERT(dst->m_TEX0.TBW > m_TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO)
ASSERT(m_texture == NULL);
m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y);
GSVector4 size = GSVector4(dstsize).xyxy();
GSVector4 scale = GSVector4(dst->m_texture->m_scale).xyxy();
int bw = 64;
int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
GSVector4i br(0, 0, bw, bh);
int sw = (int)dst->m_TEX0.TBW << 6;
int dw = (int)m_TEX0.TBW << 6;
int dh = 1 << m_TEX0.TH;
if(sw != 0)
for(int dy = 0; dy < dh; dy += bh)
{
for(int dx = 0; dx < dw; dx += bw)
{
int o = dy * dw / bh + dx;
int sx = o % sw;
int sy = o / sw;
GSVector4 sr = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
GSVector4 dr = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
m_renderer->m_dev->StretchRect(dst->m_texture, sr, m_texture, dr);
// TODO: this is quite a lot of StretchRect, do it with one Draw
}
}
}
else if(tw < tp)
{
// FIXME: timesplitters blurs the render target by blending itself over a couple of times
if(tw == 256 && th == 128 && tp == 512 && (m_TEX0.TBP0 == 0 || m_TEX0.TBP0 == 0x00e00))
{
return false;
}
}
// width/height conversion
GSVector2 scale = dst->m_texture->m_scale;
GSVector4 dr(0, 0, w, h);
if(w > dstsize.x)
{
scale.x = (float)dstsize.x / tw;
dr.z = (float)dstsize.x * scale.x / dst->m_texture->m_scale.x;
w = dstsize.x;
}
if(h > dstsize.y)
{
scale.y = (float)dstsize.y / th;
dr.w = (float)dstsize.y * scale.y / dst->m_texture->m_scale.y;
h = dstsize.y;
}
GSVector4 sr(0, 0, w, h);
GSTexture* st = m_texture ? m_texture : dst->m_texture;
GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h);
if(!m_texture)
{
m_texture = dt;
}
if((sr == dr).alltrue())
{
D3D10_BOX box = {0, 0, 0, w, h, 1};
(*(GSDevice10*)m_renderer->m_dev)->CopySubresourceRegion(*(GSTexture10*)dt, 0, 0, 0, 0, *(GSTexture10*)st, 0, &box);
}
else
{
sr.z /= st->GetWidth();
sr.w /= st->GetHeight();
m_renderer->m_dev->StretchRect(st, sr, dt, dr);
}
if(dt != m_texture)
{
m_renderer->m_dev->Recycle(m_texture);
m_texture = dt;
}
m_texture->m_scale = scale;
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_bpp = 0;
break;
case PSM_PSMCT24:
m_bpp = 1;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_bpp = 2;
break;
case PSM_PSMT8H:
m_bpp = 3;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
case PSM_PSMT4HL:
m_bpp = 4;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
case PSM_PSMT4HH:
m_bpp = 5;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
}
return true;
}
// Target10
void GSTextureCache10::Target10::Read(const GSVector4i& r)
@ -263,52 +76,21 @@ void GSTextureCache10::Target10::Read(const GSVector4i& r)
{
// TODO: block level write
uint32 bp = m_TEX0.TBP0;
uint32 bw = m_TEX0.TBW;
GSLocalMemory::PixelOffset* po = m_renderer->m_mem.GetPixelOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[m_TEX0.PSM].pa;
if(m_TEX0.PSM == PSM_PSMCT32)
{
for(int y = r.top; y < r.bottom; y++, m.bits += m.pitch)
{
uint32 addr = pa(0, y, bp, bw);
int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
m_renderer->m_mem.WritePixel32(addr + offset[x], ((uint32*)m.bits)[i]);
}
}
}
else if(m_TEX0.PSM == PSM_PSMCT24)
{
for(int y = r.top; y < r.bottom; y++, m.bits += m.pitch)
{
uint32 addr = pa(0, y, bp, bw);
int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
m_renderer->m_mem.WritePixel24(addr + offset[x], ((uint32*)m.bits)[i]);
}
}
}
else if(m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S)
{
for(int y = r.top; y < r.bottom; y++, m.bits += m.pitch)
{
uint32 addr = pa(0, y, bp, bw);
int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
m_renderer->m_mem.WritePixel16(addr + offset[x], ((uint16*)m.bits)[i]);
}
}
}
else
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, po, r);
break;
case PSM_PSMCT24:
m_renderer->m_mem.WritePixel24(m.bits, m.pitch, po, r);
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_renderer->m_mem.WritePixel16(m.bits, m.pitch, po, r);
break;
default:
ASSERT(0);
}

View File

@ -28,11 +28,11 @@ class GSTextureCache10 : public GSTextureCache
{
class Source10 : public Source
{
protected:
int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;}
public:
explicit Source10(GSRenderer* r) : Source(r) {}
bool Create();
bool Create(Target* dst);
};
class Target10 : public Target

View File

@ -31,194 +31,6 @@ GSTextureCache11::GSTextureCache11(GSRenderer* r)
// Source11
bool GSTextureCache11::Source11::Create()
{
// m_renderer->m_perfmon.Put(GSPerfMon::WriteTexture, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
m_bpp = 0;
ASSERT(m_texture == NULL);
m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH);
return m_texture != NULL;
}
bool GSTextureCache11::Source11::Create(Target* dst)
{
m_target = true;
if(dst->m_type != RenderTarget)
{
// TODO
return false;
}
// TODO: clean up this mess
dst->Update();
// m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
int tw = 1 << m_TEX0.TW;
int th = 1 << m_TEX0.TH;
int tp = (int)m_TEX0.TW << 6;
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
int w = (int)(dst->m_texture->m_scale.x * tw);
int h = (int)(dst->m_texture->m_scale.y * th);
GSVector2i dstsize = dst->m_texture->GetSize();
// pitch conversion
if(dst->m_TEX0.TBW != m_TEX0.TBW) // && dst->m_TEX0.PSM == m_TEX0.PSM
{
// sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left)
// ASSERT(dst->m_TEX0.TBW > m_TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO)
ASSERT(m_texture == NULL);
m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y);
GSVector4 size = GSVector4(dstsize).xyxy();
GSVector4 scale = GSVector4(dst->m_texture->m_scale).xyxy();
int bw = 64;
int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
GSVector4i br(0, 0, bw, bh);
int sw = (int)dst->m_TEX0.TBW << 6;
int dw = (int)m_TEX0.TBW << 6;
int dh = 1 << m_TEX0.TH;
if(sw != 0)
for(int dy = 0; dy < dh; dy += bh)
{
for(int dx = 0; dx < dw; dx += bw)
{
int o = dy * dw / bh + dx;
int sx = o % sw;
int sy = o / sw;
GSVector4 sr = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
GSVector4 dr = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
m_renderer->m_dev->StretchRect(dst->m_texture, sr, m_texture, dr);
// TODO: this is quite a lot of StretchRect, do it with one Draw
}
}
}
else if(tw < tp)
{
// FIXME: timesplitters blurs the render target by blending itself over a couple of times
if(tw == 256 && th == 128 && tp == 512 && (m_TEX0.TBP0 == 0 || m_TEX0.TBP0 == 0x00e00))
{
return false;
}
}
// width/height conversion
GSVector2 scale = dst->m_texture->m_scale;
GSVector4 dr(0, 0, w, h);
if(w > dstsize.x)
{
scale.x = (float)dstsize.x / tw;
dr.z = (float)dstsize.x * scale.x / dst->m_texture->m_scale.x;
w = dstsize.x;
}
if(h > dstsize.y)
{
scale.y = (float)dstsize.y / th;
dr.w = (float)dstsize.y * scale.y / dst->m_texture->m_scale.y;
h = dstsize.y;
}
GSVector4 sr(0, 0, w, h);
GSTexture* st = m_texture ? m_texture : dst->m_texture;
GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h);
if(!m_texture)
{
m_texture = dt;
}
if((sr == dr).alltrue())
{
D3D11_BOX box = {0, 0, 0, w, h, 1};
ID3D11DeviceContext* ctx = *(GSDevice11*)m_renderer->m_dev;
ctx->CopySubresourceRegion(*(GSTexture11*)dt, 0, 0, 0, 0, *(GSTexture11*)st, 0, &box);
}
else
{
sr.z /= st->GetWidth();
sr.w /= st->GetHeight();
m_renderer->m_dev->StretchRect(st, sr, dt, dr);
}
if(dt != m_texture)
{
m_renderer->m_dev->Recycle(m_texture);
m_texture = dt;
}
m_texture->m_scale = scale;
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_bpp = 0;
break;
case PSM_PSMCT24:
m_bpp = 1;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_bpp = 2;
break;
case PSM_PSMT8H:
m_bpp = 3;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
case PSM_PSMT4HL:
m_bpp = 4;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
case PSM_PSMT4HH:
m_bpp = 5;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
}
return true;
}
// Target11
void GSTextureCache11::Target11::Read(const GSVector4i& r)
@ -264,52 +76,21 @@ void GSTextureCache11::Target11::Read(const GSVector4i& r)
{
// TODO: block level write
uint32 bp = m_TEX0.TBP0;
uint32 bw = m_TEX0.TBW;
GSLocalMemory::PixelOffset* po = m_renderer->m_mem.GetPixelOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[m_TEX0.PSM].pa;
if(m_TEX0.PSM == PSM_PSMCT32)
{
for(int y = r.top; y < r.bottom; y++, m.bits += m.pitch)
{
uint32 addr = pa(0, y, bp, bw);
int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
m_renderer->m_mem.WritePixel32(addr + offset[x], ((uint32*)m.bits)[i]);
}
}
}
else if(m_TEX0.PSM == PSM_PSMCT24)
{
for(int y = r.top; y < r.bottom; y++, m.bits += m.pitch)
{
uint32 addr = pa(0, y, bp, bw);
int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
m_renderer->m_mem.WritePixel24(addr + offset[x], ((uint32*)m.bits)[i]);
}
}
}
else if(m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S)
{
for(int y = r.top; y < r.bottom; y++, m.bits += m.pitch)
{
uint32 addr = pa(0, y, bp, bw);
int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
m_renderer->m_mem.WritePixel16(addr + offset[x], ((uint16*)m.bits)[i]);
}
}
}
else
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, po, r);
break;
case PSM_PSMCT24:
m_renderer->m_mem.WritePixel24(m.bits, m.pitch, po, r);
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_renderer->m_mem.WritePixel16(m.bits, m.pitch, po, r);
break;
default:
ASSERT(0);
}

View File

@ -28,11 +28,11 @@ class GSTextureCache11 : public GSTextureCache
{
class Source11 : public Source
{
protected:
int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;}
public:
explicit Source11(GSRenderer* r) : Source(r) {}
bool Create();
bool Create(Target* dst);
};
class Target11 : public Target

View File

@ -31,193 +31,6 @@ GSTextureCache9::GSTextureCache9(GSRenderer* r)
// Source9
bool GSTextureCache9::Source9::Create()
{
// m_renderer->m_perfmon.Put(GSPerfMon::WriteTexture, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
m_bpp = 0;
ASSERT(m_texture == NULL);
m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH);
return m_texture != NULL;
}
bool GSTextureCache9::Source9::Create(Target* dst)
{
m_target = true;
if(dst->m_type != RenderTarget)
{
// TODO
return false;
}
// TODO: clean up this mess
dst->Update();
// m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
int tw = 1 << m_TEX0.TW;
int th = 1 << m_TEX0.TH;
int tp = (int)m_TEX0.TW << 6;
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
int w = (int)(dst->m_texture->m_scale.x * tw);
int h = (int)(dst->m_texture->m_scale.y * th);
GSVector2i dstsize = dst->m_texture->GetSize();
// pitch conversion
if(dst->m_TEX0.TBW != m_TEX0.TBW) // && dst->m_TEX0.PSM == m_TEX0.PSM
{
// sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left)
// ASSERT(dst->m_TEX0.TBW > m_TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO)
ASSERT(m_texture == NULL);
m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y);
GSVector4 size = GSVector4(dstsize).xyxy();
GSVector4 scale = GSVector4(dst->m_texture->m_scale).xyxy();
int bw = 64;
int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
GSVector4i br(0, 0, bw, bh);
int sw = (int)dst->m_TEX0.TBW << 6;
int dw = (int)m_TEX0.TBW << 6;
int dh = 1 << m_TEX0.TH;
if(sw != 0)
for(int dy = 0; dy < dh; dy += bh)
{
for(int dx = 0; dx < dw; dx += bw)
{
int o = dy * dw / bh + dx;
int sx = o % sw;
int sy = o / sw;
GSVector4 sr = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
GSVector4 dr = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
m_renderer->m_dev->StretchRect(dst->m_texture, sr, m_texture, dr);
// TODO: this is quite a lot of StretchRect, do it with one Draw
}
}
}
else if(tw < tp)
{
// FIXME: timesplitters blurs the render target by blending itself over a couple of times
if(tw == 256 && th == 128 && tp == 512 && (m_TEX0.TBP0 == 0 || m_TEX0.TBP0 == 0x00e00))
{
return false;
}
}
// width/height conversion
GSVector2 scale = dst->m_texture->m_scale;
GSVector4 dr(0, 0, w, h);
if(w > dstsize.x)
{
scale.x = (float)dstsize.x / tw;
dr.z = (float)dstsize.x * scale.x / dst->m_texture->m_scale.x;
w = dstsize.x;
}
if(h > dstsize.y)
{
scale.y = (float)dstsize.y / th;
dr.w = (float)dstsize.y * scale.y / dst->m_texture->m_scale.y;
h = dstsize.y;
}
GSVector4 sr(0, 0, w, h);
GSTexture* st = m_texture ? m_texture : dst->m_texture;
GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h);
if(!m_texture)
{
m_texture = dt;
}
if((sr == dr).alltrue())
{
GSVector4i r(0, 0, w, h);
(*(GSDevice9*)m_renderer->m_dev)->StretchRect(*(GSTexture9*)st, r, *(GSTexture9*)dt, r, D3DTEXF_POINT);
}
else
{
sr.z /= st->GetWidth();
sr.w /= st->GetHeight();
m_renderer->m_dev->StretchRect(st, sr, dt, dr);
}
if(dt != m_texture)
{
m_renderer->m_dev->Recycle(m_texture);
m_texture = dt;
}
m_texture->m_scale = scale;
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_bpp = 0;
break;
case PSM_PSMCT24:
m_bpp = 1;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_bpp = 2;
break;
case PSM_PSMT8H:
m_bpp = 3;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
case PSM_PSMT4HL:
m_bpp = 4;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
case PSM_PSMT4HH:
m_bpp = 5;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
}
return true;
}
// Target9
void GSTextureCache9::Target9::Read(const GSVector4i& r)
@ -261,52 +74,21 @@ void GSTextureCache9::Target9::Read(const GSVector4i& r)
{
// TODO: block level write
uint32 bp = m_TEX0.TBP0;
uint32 bw = m_TEX0.TBW;
GSLocalMemory::PixelOffset* po = m_renderer->m_mem.GetPixelOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[m_TEX0.PSM].pa;
if(m_TEX0.PSM == PSM_PSMCT32)
{
for(int y = r.top; y < r.bottom; y++, m.bits += m.pitch)
{
uint32 addr = pa(0, y, bp, bw);
int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
m_renderer->m_mem.WritePixel32(addr + offset[x], ((uint32*)m.bits)[i]);
}
}
}
else if(m_TEX0.PSM == PSM_PSMCT24)
{
for(int y = r.top; y < r.bottom; y++, m.bits += m.pitch)
{
uint32 addr = pa(0, y, bp, bw);
int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
m_renderer->m_mem.WritePixel24(addr + offset[x], ((uint32*)m.bits)[i]);
}
}
}
else if(m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S)
{
for(int y = r.top; y < r.bottom; y++, m.bits += m.pitch)
{
uint32 addr = pa(0, y, bp, bw);
int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
m_renderer->m_mem.WriteFrame16(addr + offset[x], ((uint32*)m.bits)[i]);
}
}
}
else
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, po, r);
break;
case PSM_PSMCT24:
m_renderer->m_mem.WritePixel24(m.bits, m.pitch, po, r);
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_renderer->m_mem.WriteFrame16(m.bits, m.pitch, po, r);
break;
default:
ASSERT(0);
}

View File

@ -28,11 +28,11 @@ class GSTextureCache9 : public GSTextureCache
{
class Source9 : public Source
{
protected:
int Get8bitFormat() {return D3DFMT_A8;}
public:
explicit Source9(GSRenderer* r) : Source(r) {}
bool Create();
bool Create(Target* dst);
};
class Target9 : public Target

View File

@ -31,22 +31,6 @@ GSTextureCacheOGL::GSTextureCacheOGL(GSRenderer* r)
// SourceOGL
bool GSTextureCacheOGL::SourceOGL::Create()
{
// TODO
return true;
}
bool GSTextureCacheOGL::SourceOGL::Create(Target* dst)
{
m_target = true;
// TODO
return true;
}
// TargetOGL
void GSTextureCacheOGL::TargetOGL::Read(const GSVector4i& r)

View File

@ -28,11 +28,11 @@ class GSTextureCacheOGL : public GSTextureCache
{
class SourceOGL : public Source
{
protected:
int Get8bitFormat() {return 0;} // TODO
public:
explicit SourceOGL(GSRenderer* r) : Source(r) {}
bool Create();
bool Create(Target* dst);
};
class TargetOGL : public Target

View File

@ -30,6 +30,17 @@ class GSTextureFX : public GSAlignedClass<16>
public:
#pragma pack(push, 1)
enum
{
FMT_32,
FMT_24,
FMT_16,
FMT_8H,
FMT_4HL,
FMT_4HH,
FMT_8,
};
__declspec(align(16)) struct VSConstantBuffer
{
GSVector4 VertexScale;
@ -154,7 +165,7 @@ public:
uint32 fst:1;
uint32 wms:2;
uint32 wmt:2;
uint32 bpp:3;
uint32 fmt:3;
uint32 aem:1;
uint32 tfx:3;
uint32 tcc:1;

View File

@ -204,7 +204,7 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
str[0] = format("%d", sel.fst);
str[1] = format("%d", sel.wms);
str[2] = format("%d", sel.wmt);
str[3] = format("%d", sel.bpp);
str[3] = format("%d", sel.fmt);
str[4] = format("%d", sel.aem);
str[5] = format("%d", sel.tfx);
str[6] = format("%d", sel.tcc);
@ -221,7 +221,7 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
{"PS_FST", str[0].c_str()},
{"PS_WMS", str[1].c_str()},
{"PS_WMT", str[2].c_str()},
{"PS_BPP", str[3].c_str()},
{"PS_FMT", str[3].c_str()},
{"PS_AEM", str[4].c_str()},
{"PS_TFX", str[5].c_str()},
{"PS_TCC", str[6].c_str()},
@ -256,7 +256,7 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
if(sel.tfx != 4)
{
if(!(sel.bpp < 3 && sel.wms < 3 && sel.wmt < 3))
if(!(sel.fmt < 3 && sel.wms < 3 && sel.wmt < 3))
{
ssel.ltf = 0;
}
@ -288,7 +288,7 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
m_ps_ss[ssel] = ss0;
}
if(sel.bpp >= 3)
if(sel.fmt >= 3)
{
ss1 = m_palette_ss;
}

View File

@ -206,7 +206,7 @@ void GSTextureFX11::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
str[0] = format("%d", sel.fst);
str[1] = format("%d", sel.wms);
str[2] = format("%d", sel.wmt);
str[3] = format("%d", sel.bpp);
str[3] = format("%d", sel.fmt);
str[4] = format("%d", sel.aem);
str[5] = format("%d", sel.tfx);
str[6] = format("%d", sel.tcc);
@ -223,7 +223,7 @@ void GSTextureFX11::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
{"PS_FST", str[0].c_str()},
{"PS_WMS", str[1].c_str()},
{"PS_WMT", str[2].c_str()},
{"PS_BPP", str[3].c_str()},
{"PS_FMT", str[3].c_str()},
{"PS_AEM", str[4].c_str()},
{"PS_TFX", str[5].c_str()},
{"PS_TCC", str[6].c_str()},
@ -260,7 +260,7 @@ void GSTextureFX11::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
if(sel.tfx != 4)
{
if(!(sel.bpp < 3 && sel.wms < 3 && sel.wmt < 3))
if(!(sel.fmt < 3 && sel.wms < 3 && sel.wmt < 3))
{
ssel.ltf = 0;
}
@ -292,7 +292,7 @@ void GSTextureFX11::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
m_ps_ss[ssel] = ss0;
}
if(sel.bpp >= 3)
if(sel.fmt >= 3)
{
ss1 = m_palette_ss;
}

View File

@ -189,7 +189,7 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample
str[0] = format("%d", sel.fst);
str[1] = format("%d", sel.wms);
str[2] = format("%d", sel.wmt);
str[3] = format("%d", sel.bpp);
str[3] = format("%d", sel.fmt);
str[4] = format("%d", sel.aem);
str[5] = format("%d", sel.tfx);
str[6] = format("%d", sel.tcc);
@ -205,7 +205,7 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample
{"PS_FST", str[0].c_str()},
{"PS_WMS", str[1].c_str()},
{"PS_WMT", str[2].c_str()},
{"PS_BPP", str[3].c_str()},
{"PS_FMT", str[3].c_str()},
{"PS_AEM", str[4].c_str()},
{"PS_TFX", str[5].c_str()},
{"PS_TCC", str[6].c_str()},
@ -233,7 +233,7 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample
if(sel.tfx != 4)
{
if(!(sel.bpp < 3 && sel.wms < 3 && sel.wmt < 3))
if(!(sel.fmt < 3 && sel.wms < 3 && sel.wmt < 3))
{
ssel.ltf = 0;
}

View File

@ -82,12 +82,12 @@ IDB_LOGO10 BITMAP "res\\logo10.bmp"
// Dialog
//
IDD_CONFIG DIALOGEX 0, 0, 189, 233
IDD_CONFIG DIALOGEX 0, 0, 189, 253
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "Settings..."
FONT 8, "MS Shell Dlg", 400, 0, 0x1
BEGIN
CONTROL 2022,IDC_LOGO10,"Static",SS_BITMAP,7,7,175,44
CONTROL 2022,IDC_LOGO10,"Static",SS_BITMAP,7,7,175,42
CONTROL 2021,IDC_LOGO9,"Static",SS_BITMAP,7,7,175,44
LTEXT "Resolution:",IDC_STATIC,7,59,37,8
COMBOBOX IDC_RESOLUTION,71,57,111,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
@ -107,13 +107,14 @@ BEGIN
EDITTEXT IDC_SWTHREADS_EDIT,71,132,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_SWTHREADS,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,135,11,14
CONTROL "Texture filtering",IDC_FILTER,"Button",BS_AUTO3STATE | WS_TABSTOP,7,152,67,10
CONTROL "Logarithmic Z",IDC_LOGZ,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,153,58,10
CONTROL "Wait vsync",IDC_VSYNC,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,165,51,10
CONTROL "Alpha correction (FBA)",IDC_FBA,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,166,102,10
CONTROL "Edge anti-aliasing (AA1, sw-mode only)",IDC_AA1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,179,141,10
CONTROL "Enable output merger blur effect",IDC_BLUR,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,193,121,10
DEFPUSHBUTTON "OK",IDOK,43,212,50,14
PUSHBUTTON "Cancel",IDCANCEL,96,212,50,14
CONTROL "Logarithmic Z",IDC_LOGZ,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,89,153,58,10
CONTROL "Allow 8-bit textures",IDC_PALTEX,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,165,82,10
CONTROL "Alpha correction (FBA)",IDC_FBA,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,89,166,93,10
CONTROL "Wait vsync",IDC_VSYNC,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,177,51,10
CONTROL "Edge anti-aliasing (AA1, sw-mode only)",IDC_AA1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,190,141,10
CONTROL "Enable output merger blur effect",IDC_BLUR,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,204,121,10
DEFPUSHBUTTON "OK",IDOK,43,232,50,14
PUSHBUTTON "Cancel",IDCANCEL,96,232,50,14
END
IDD_CAPTURE DIALOGEX 0, 0, 279, 71
@ -172,10 +173,11 @@ BEGIN
LEFTMARGIN, 7
RIGHTMARGIN, 182
VERTGUIDE, 71
VERTGUIDE, 80
VERTGUIDE, 89
VERTGUIDE, 182
TOPMARGIN, 7
BOTTOMMARGIN, 226
BOTTOMMARGIN, 246
HORZGUIDE, 49
END
IDD_CAPTURE, DIALOG

View File

@ -1,3 +1,11 @@
#define FMT_32 0
#define FMT_24 1
#define FMT_16 2
#define FMT_8H 3
#define FMT_4HL 4
#define FMT_4HH 5
#define FMT_8 6
#if SHADER_MODEL >= 0x400
#ifndef VS_BPPZ
@ -14,9 +22,9 @@
#ifndef PS_FST
#define PS_FST 0
#define PS_WMS 3
#define PS_WMT 3
#define PS_BPP 0
#define PS_WMS 0
#define PS_WMT 0
#define PS_FMT FMT_8
#define PS_AEM 0
#define PS_TFX 0
#define PS_TCC 1
@ -26,7 +34,7 @@
#define PS_CLR1 0
#define PS_FBA 0
#define PS_AOUT 0
#define PS_LTF 1
#define PS_LTF 0
#endif
struct VS_INPUT
@ -59,10 +67,10 @@ struct PS_OUTPUT
float4 c1 : SV_Target1;
};
Texture2D<float4> Texture;
Texture2D<float> Palette;
SamplerState TextureSampler;
SamplerState PaletteSampler;
Texture2D<float4> Texture : register(t0);
Texture2D<float4> Palette : register(t1);
SamplerState TextureSampler : register(s0);
SamplerState PaletteSampler : register(s1);
cbuffer cb0
{
@ -83,6 +91,16 @@ cbuffer cb1
uint4 MskFix;
};
float4 sample_c(float2 uv)
{
return Texture.Sample(TextureSampler, uv);
}
float4 sample_p(float u)
{
return Palette.Sample(PaletteSampler, u);
}
#elif SHADER_MODEL <= 0x300
#ifndef VS_BPPZ
@ -94,12 +112,12 @@ cbuffer cb1
#ifndef PS_FST
#define PS_FST 0
#define PS_WMS 3
#define PS_WMT 3
#define PS_BPP 0
#define PS_WMS 0
#define PS_WMT 0
#define PS_FMT FMT_8
#define PS_AEM 0
#define PS_TFX 0
#define PS_TCC 1
#define PS_TCC 0
#define PS_ATE 0
#define PS_ATST 4
#define PS_FOG 0
@ -130,7 +148,7 @@ struct PS_INPUT
};
sampler Texture : register(s0);
sampler1D Palette : register(s1);
sampler Palette : register(s1);
sampler1D UMSKFIX : register(s2);
sampler1D VMSKFIX : register(s3);
@ -148,8 +166,17 @@ float4 ps_params[5];
#define WH ps_params[2]
#define MinMax ps_params[3]
#define MinF ps_params[4].xy
#define TA0 ps_params[4].z
#define TA1 ps_params[4].w
#define TA ps_params[4].zw
float4 sample_c(float2 uv)
{
return tex2D(Texture, uv);
}
float4 sample_p(float u)
{
return tex2D(Palette, u);
}
#endif
@ -249,35 +276,152 @@ float2 clampuv(float2 uv)
return uv;
}
float4x4 sample_4c(float4 uv)
{
float4x4 c;
c[0] = sample_c(uv.xy);
c[1] = sample_c(uv.zy);
c[2] = sample_c(uv.xw);
c[3] = sample_c(uv.zw);
return c;
}
float4 sample_4a(float4 uv)
{
float4 c;
c.x = sample_c(uv.xy).a;
c.y = sample_c(uv.zy).a;
c.z = sample_c(uv.xw).a;
c.w = sample_c(uv.zw).a;
#if SHADER_MODEL <= 0x300
if(PS_RT) c *= 0.5;
#endif
return c;
}
float4x4 sample_4p(float4 u)
{
float4x4 c;
c[0] = sample_p(u.x);
c[1] = sample_p(u.y);
c[2] = sample_p(u.z);
c[3] = sample_p(u.w);
return c;
}
float4 sample(float2 tc, float w)
{
if(!PS_FST)
{
tc /= w;
}
float4 t;
/*
if(PS_FMT <= FMT_16 && PS_WMS < 2 && PS_WMT < 2)
{
t = sample_c(tc);
}
*/
if(PS_FMT <= FMT_16 && PS_WMS < 3 && PS_WMT < 3)
{
t = sample_c(clampuv(tc));
}
else
{
float4 uv2 = tc.xyxy + HalfTexel;
float2 dd = frac(uv2.xy * WH.zw);
float4 uv = wrapuv(uv2);
float4x4 c;
if(PS_FMT == FMT_8H)
{
c = sample_4p(sample_4a(uv));
}
else if(PS_FMT == FMT_4HL)
{
c = sample_4p(fmod(sample_4a(uv), 1.0f / 16));
}
else if(PS_FMT == FMT_4HH)
{
c = sample_4p(fmod(sample_4a(uv) * 16, 1.0f / 16));
}
else if(PS_FMT == FMT_8)
{
c = sample_4p(sample_4a(uv));
}
else
{
c = sample_4c(uv);
}
if(PS_LTF)
{
t = lerp(lerp(c[0], c[1], dd.x), lerp(c[2], c[3], dd.x), dd.y);
}
else
{
t = c[0];
}
}
if(PS_FMT == FMT_32)
{
#if SHADER_MODEL <= 0x300
if(PS_RT) t.a *= 0.5;
#endif
}
else if(PS_FMT == FMT_24)
{
t.a = !PS_AEM || any(t.rgb) ? TA.x : 0;
}
else if(PS_FMT == FMT_16)
{
// a bit incompatible with up-scaling because the 1 bit alpha is interpolated
t.a = t.a >= 0.5 ? TA.y : !PS_AEM || any(t.rgb) ? TA.x : 0;
}
return t;
}
float4 tfx(float4 t, float4 c)
{
if(PS_TFX == 0)
{
if(PS_TCC == 0)
if(PS_TCC)
{
c.rgb = c.rgb * t.rgb * 255.0f / 128;
c = c * t * 255.0f / 128;
}
else
{
c = c * t * 255.0f / 128;
c.rgb = c.rgb * t.rgb * 255.0f / 128;
}
}
else if(PS_TFX == 1)
{
if(PS_TCC == 0)
if(PS_TCC)
{
c.rgb = t.rgb;
c = t;
}
else
{
c = t;
c.rgb = t.rgb;
}
}
else if(PS_TFX == 2)
{
c.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a;
if(PS_TCC == 1)
if(PS_TCC)
{
c.a += t.a;
}
@ -286,7 +430,7 @@ float4 tfx(float4 t, float4 c)
{
c.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a;
if(PS_TCC == 1)
if(PS_TCC)
{
c.a = t.a;
}
@ -297,7 +441,7 @@ float4 tfx(float4 t, float4 c)
void atst(float4 c)
{
if(PS_ATE == 1)
if(PS_ATE)
{
float a = trunc(c.a * 255);
@ -326,7 +470,7 @@ void atst(float4 c)
float4 fog(float4 c, float f)
{
if(PS_FOG == 1)
if(PS_FOG)
{
c.rgb = lerp(FogColor, c.rgb, f);
}
@ -334,6 +478,24 @@ float4 fog(float4 c, float f)
return c;
}
float4 ps_color(PS_INPUT input)
{
float4 t = sample(input.t.xy, input.t.w);
float4 c = tfx(t, input.c);
atst(c);
c = fog(c, input.t.z);
if(PS_CLR1) // needed for Cd * (As/Ad/F + 1) blending modes
{
c.rgb = 1;
}
return c;
}
#if SHADER_MODEL >= 0x400
VS_OUTPUT vs_main(VS_INPUT input)
@ -363,9 +525,9 @@ VS_OUTPUT vs_main(VS_INPUT input)
output.p = p * VertexScale - VertexOffset;
if(VS_TME == 1)
if(VS_TME)
{
if(VS_FST == 1)
if(VS_FST)
{
output.t.xy = input.t * TextureScale;
output.t.w = 1.0f;
@ -454,139 +616,21 @@ void gs_main(line VS_OUTPUT input[2], inout TriangleStream<VS_OUTPUT> stream)
#endif
float4 sample(float2 tc, float w)
{
if(PS_FST == 0)
{
tc /= w;
}
float4 t;
/*
if(PS_BPP < 3 && PS_WMS < 2 && PS_WMT < 2)
{
t = Texture.Sample(TextureSampler, tc);
}
*/
if(PS_BPP < 3 && PS_WMS < 3 && PS_WMT < 3)
{
t = Texture.Sample(TextureSampler, clampuv(tc));
}
else
{
float w, h;
Texture.GetDimensions(w, h);
float4 uv2 = tc.xyxy + HalfTexel;
float2 dd = frac(uv2.xy * float2(w, h)); // * WH.zw
float4 uv = wrapuv(uv2);
float4 t00, t01, t10, t11;
if(PS_BPP == 3) // 8H
{
float4 a;
a.x = Texture.Sample(TextureSampler, uv.xy).a;
a.y = Texture.Sample(TextureSampler, uv.zy).a;
a.z = Texture.Sample(TextureSampler, uv.xw).a;
a.w = Texture.Sample(TextureSampler, uv.zw).a;
t00 = Palette.Sample(PaletteSampler, a.x);
t01 = Palette.Sample(PaletteSampler, a.y);
t10 = Palette.Sample(PaletteSampler, a.z);
t11 = Palette.Sample(PaletteSampler, a.w);
}
else if(PS_BPP == 4) // 4HL
{
float4 a;
a.x = Texture.Sample(TextureSampler, uv.xy).a;
a.y = Texture.Sample(TextureSampler, uv.zy).a;
a.z = Texture.Sample(TextureSampler, uv.xw).a;
a.w = Texture.Sample(TextureSampler, uv.zw).a;
a = fmod(a, 1.0f / 16);
t00 = Palette.Sample(PaletteSampler, a.x);
t01 = Palette.Sample(PaletteSampler, a.y);
t10 = Palette.Sample(PaletteSampler, a.z);
t11 = Palette.Sample(PaletteSampler, a.w);
}
else if(PS_BPP == 5) // 4HH
{
float4 a;
a.x = Texture.Sample(TextureSampler, uv.xy).a;
a.y = Texture.Sample(TextureSampler, uv.zy).a;
a.z = Texture.Sample(TextureSampler, uv.xw).a;
a.w = Texture.Sample(TextureSampler, uv.zw).a;
a = fmod(a * 16, 1.0f / 16);
t00 = Palette.Sample(PaletteSampler, a.x);
t01 = Palette.Sample(PaletteSampler, a.y);
t10 = Palette.Sample(PaletteSampler, a.z);
t11 = Palette.Sample(PaletteSampler, a.w);
}
else
{
t00 = Texture.Sample(TextureSampler, uv.xy);
t01 = Texture.Sample(TextureSampler, uv.zy);
t10 = Texture.Sample(TextureSampler, uv.xw);
t11 = Texture.Sample(TextureSampler, uv.zw);
}
if(PS_LTF)
{
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
else
{
t = t00;
}
}
if(PS_BPP == 1) // 24
{
t.a = PS_AEM == 0 || any(t.rgb) ? TA.x : 0;
}
else if(PS_BPP == 2) // 16
{
// a bit incompatible with up-scaling because the 1 bit alpha is interpolated
t.a = t.a >= 0.5 ? TA.y : PS_AEM == 0 || any(t.rgb) ? TA.x : 0;
}
return t;
}
PS_OUTPUT ps_main(PS_INPUT input)
{
float4 t = sample(input.t.xy, input.t.w);
float4 c = tfx(t, input.c);
atst(c);
c = fog(c, input.t.z);
if(PS_CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes
{
c.rgb = 1;
}
float4 c = ps_color(input);
PS_OUTPUT output;
output.c1 = c.a * 2; // used for alpha blending
if(PS_AOUT == 1) // 16 bit output
if(PS_AOUT) // 16 bit output
{
float a = 128.0f / 255; // alpha output will be 0x80
c.a = PS_FBA == 1 ? a : step(0.5, c.a) * a;
c.a = PS_FBA ? a : step(0.5, c.a) * a;
}
else if(PS_FBA == 1)
else if(PS_FBA)
{
if(c.a < 0.5) c.a += 0.5;
}
@ -620,14 +664,14 @@ VS_OUTPUT vs_main(VS_INPUT input)
output.p = p * VertexScale - VertexOffset;
if(VS_LOGZ == 1)
if(VS_LOGZ)
{
output.p.z = log2(1.0f + input.p.z) / 32;
}
if(VS_TME == 1)
if(VS_TME)
{
if(VS_FST == 1)
if(VS_FST)
{
output.t.xy = input.t * TextureScale;
output.t.w = 1.0f;
@ -650,134 +694,9 @@ VS_OUTPUT vs_main(VS_INPUT input)
return output;
}
float4 sample(float2 tc, float w)
{
if(PS_FST == 0)
{
tc /= w;
}
float4 t;
/*
if(PS_BPP < 3 && PS_WMS < 2 && PS_WMT < 2)
{
t = tex2D(Texture, tc);
}
*/
if(PS_BPP < 3 && PS_WMS < 3 && PS_WMT < 3)
{
t = tex2D(Texture, clampuv(tc));
}
else
{
float4 uv2 = tc.xyxy + HalfTexel;
float2 dd = frac(uv2.xy * WH.zw);
float4 uv = wrapuv(uv2);
float4 t00, t01, t10, t11;
if(PS_BPP == 3) // 8HP
{
float4 a;
a.x = tex2D(Texture, uv.xy).a;
a.y = tex2D(Texture, uv.zy).a;
a.z = tex2D(Texture, uv.xw).a;
a.w = tex2D(Texture, uv.zw).a;
if(PS_RT == 1) a *= 0.5;
t00 = tex1D(Palette, a.x);
t01 = tex1D(Palette, a.y);
t10 = tex1D(Palette, a.z);
t11 = tex1D(Palette, a.w);
}
else if(PS_BPP == 4) // 4HL
{
float4 a;
a.x = tex2D(Texture, uv.xy).a;
a.y = tex2D(Texture, uv.zy).a;
a.z = tex2D(Texture, uv.xw).a;
a.w = tex2D(Texture, uv.zw).a;
if(PS_RT == 1) a *= 0.5;
a = fmod(a, 1.0f / 16);
t00 = tex1D(Palette, a.x);
t01 = tex1D(Palette, a.y);
t10 = tex1D(Palette, a.z);
t11 = tex1D(Palette, a.w);
}
else if(PS_BPP == 5) // 4HH
{
float4 a;
a.x = tex2D(Texture, uv.xy).a;
a.y = tex2D(Texture, uv.zy).a;
a.z = tex2D(Texture, uv.xw).a;
a.w = tex2D(Texture, uv.zw).a;
if(PS_RT == 1) a *= 0.5;
a = fmod(a * 16, 1.0f / 16);
t00 = tex1D(Palette, a.x);
t01 = tex1D(Palette, a.y);
t10 = tex1D(Palette, a.z);
t11 = tex1D(Palette, a.w);
}
else
{
t00 = tex2D(Texture, uv.xy);
t01 = tex2D(Texture, uv.zy);
t10 = tex2D(Texture, uv.xw);
t11 = tex2D(Texture, uv.zw);
}
if(PS_LTF)
{
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
else
{
t = t00;
}
}
if(PS_BPP == 0) // 32
{
if(PS_RT == 1) t.a *= 0.5;
}
else if(PS_BPP == 1) // 24
{
t.a = PS_AEM == 0 || any(t.rgb) ? TA0 : 0;
}
else if(PS_BPP == 2) // 16
{
// a bit incompatible with up-scaling because the 1 bit alpha is interpolated
t.a = t.a >= 0.5 ? TA1 : PS_AEM == 0 || any(t.rgb) ? TA0 : 0;
}
return t;
}
float4 ps_main(PS_INPUT input) : COLOR
{
float4 t = sample(input.t.xy, input.t.w);
float4 c = tfx(t, input.c);
atst(c);
c = fog(c, input.t.z);
if(PS_CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes
{
c.rgb = 1;
}
float4 c = ps_color(input);
c.a *= 2;

View File

@ -4,6 +4,7 @@
//
#define IDC_NATIVERES 2001
#define IDC_VSYNC 2002
#define IDC_PALTEX 2002
#define IDC_CHECK3 2003
#define IDC_LOGZ 2004
#define IDC_CHECK6 2005