GSdx: moved 16 bit texture processing from gpu to cpu, with the pixel shader it was either incorrectly done or too slow, please check what got slower/faster or broken :P

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1375 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-06-17 11:24:42 +00:00
parent fb4fcc7d29
commit 8e979f1b96
21 changed files with 271 additions and 205 deletions

View File

@ -211,6 +211,6 @@ LRESULT GPURenderer::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
}
}
return m_wndproc(m_hWnd, message, wParam, lParam);
return CallWindowProc(m_wndproc, m_hWnd, message, wParam, lParam);
}

View File

@ -78,7 +78,7 @@ GSTexture* GPURendererSW::GetOutput()
}
}
m_texture->Update(GSVector4i(0, 0, w, h), buff, m_mem.GetWidth() * sizeof(uint32));
m_texture->Update(r.rsize(), buff, m_mem.GetWidth() * sizeof(uint32));
return m_texture;
}

View File

@ -126,7 +126,7 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer)
case 3: s_gs = new GSRendererHW10(s_basemem, !!mt, s_irq); break;
case 4: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, new GSDevice10()); break;
case 5: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, new GSDevice10()); break;
#if 0
#if 1
case 6: s_gs = new GSRendererOGL(s_basemem, !!mt, s_irq); break;
case 7: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, new GSDeviceOGL()); break;
case 8: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, new GSDeviceOGL()); break;

View File

@ -38,7 +38,6 @@ GSDevice9::GSDevice9()
, m_ps_ss(NULL)
, m_scissor(0, 0, 0, 0)
, m_dss(NULL)
, m_sref(0)
, m_bs(NULL)
, m_bf(0xffffffff)
, m_rtv(NULL)
@ -233,7 +232,6 @@ bool GSDevice9::Reset(int w, int h, bool fs)
m_ps_ss = NULL;
m_scissor = GSVector4i::zero();
m_dss = NULL;
m_sref = 0;
m_bs = NULL;
m_bf = 0xffffffff;
m_rtv = NULL;
@ -570,7 +568,7 @@ void GSDevice9::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, c
// om
OMSetDepthStencilState(&m_convert.dss, 0);
OMSetDepthStencilState(&m_convert.dss);
OMSetBlendState(bs, 0);
OMSetRenderTargets(dt, NULL);
@ -817,6 +815,7 @@ void GSDevice9::PSSetSamplerState(Direct3DSamplerState9* ss)
{
if(ss && m_ps_ss != ss)
{
m_dev->SetSamplerState(0, D3DSAMP_ADDRESSU, ss->AddressU);
m_dev->SetSamplerState(0, D3DSAMP_ADDRESSV, ss->AddressV);
m_dev->SetSamplerState(1, D3DSAMP_ADDRESSU, ss->AddressU);
@ -850,9 +849,9 @@ void GSDevice9::RSSet(int width, int height, const GSVector4i* scissor)
}
}
void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss, uint32 sref)
void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss)
{
if(m_dss != dss || m_sref != sref)
if(m_dss != dss)
{
m_dev->SetRenderState(D3DRS_ZENABLE, dss->DepthEnable);
m_dev->SetRenderState(D3DRS_ZWRITEENABLE, dss->DepthWriteMask);
@ -872,11 +871,10 @@ void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss, uint32 s
m_dev->SetRenderState(D3DRS_STENCILPASS, dss->StencilPassOp);
m_dev->SetRenderState(D3DRS_STENCILFAIL, dss->StencilFailOp);
m_dev->SetRenderState(D3DRS_STENCILZFAIL, dss->StencilDepthFailOp);
m_dev->SetRenderState(D3DRS_STENCILREF, sref);
m_dev->SetRenderState(D3DRS_STENCILREF, dss->StencilRef);
}
m_dss = dss;
m_sref = sref;
}
}

View File

@ -44,6 +44,7 @@ struct Direct3DDepthStencilState9
D3DSTENCILOP StencilDepthFailOp;
D3DSTENCILOP StencilPassOp;
D3DCMPFUNC StencilFunc;
uint32 StencilRef;
};
struct Direct3DBlendState9
@ -75,7 +76,6 @@ private:
Direct3DSamplerState9* m_ps_ss;
GSVector4i m_scissor;
Direct3DDepthStencilState9* m_dss;
uint32 m_sref;
Direct3DBlendState9* m_bs;
uint32 m_bf;
IDirect3DSurface9* m_rtv;
@ -168,7 +168,7 @@ public:
void PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len);
void PSSetSamplerState(Direct3DSamplerState9* ss);
void RSSet(int width, int height, const GSVector4i* scissor = NULL);
void OMSetDepthStencilState(Direct3DDepthStencilState9* dss, uint32 sref);
void OMSetDepthStencilState(Direct3DDepthStencilState9* dss);
void OMSetBlendState(Direct3DBlendState9* bs, uint32 bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds);

View File

@ -29,7 +29,14 @@ GSDeviceOGL::GSDeviceOGL()
, m_hGLRC(NULL)
, m_vbo(0)
, m_fbo(0)
, m_context(0)
, m_topology(-1)
, m_ps_ss(NULL)
, m_scissor(0, 0, 0, 0)
, m_viewport(0, 0)
, m_dss(NULL)
, m_bs(NULL)
, m_bf(-1)
, m_rt((GLuint)-1)
, m_ds((GLuint)-1)
{
@ -41,6 +48,7 @@ GSDeviceOGL::GSDeviceOGL()
GSDeviceOGL::~GSDeviceOGL()
{
if(m_context) cgDestroyContext(m_context);
if(m_vbo) glDeleteBuffers(1, &m_vbo);
if(m_fbo) glDeleteFramebuffers(1, &m_fbo);
@ -100,11 +108,6 @@ bool GSDeviceOGL::Create(GSWnd* wnd, bool vsync)
return false;
}
if(WGLEW_EXT_swap_control)
{
wglSwapIntervalEXT(vsync ? 1 : 0);
}
#endif
if(glewInit() != GLEW_OK)
@ -134,6 +137,17 @@ bool GSDeviceOGL::Create(GSWnd* wnd, bool vsync)
glBindBuffer(GL_ARRAY_BUFFER, m_vbo); CheckError();
// TODO: setup layout?
m_context = cgCreateContext();
// cgGLSetDebugMode(CG_FALSE); CheckCgError();
cgSetParameterSettingMode(m_context, CG_DEFERRED_PARAMETER_SETTING); CheckCgError();
/*
struct {CGprofile vs, gs, ps;} m_profile;
m_profile.vs = cgGLGetLatestProfile(CG_GL_VERTEX); CheckCgError();
m_profile.gs = cgGLGetLatestProfile(CG_GL_GEOMETRY); CheckCgError();
m_profile.ps = cgGLGetLatestProfile(CG_GL_FRAGMENT); CheckCgError();
*/
GSVector4i r = wnd->GetClientRect();
Reset(r.width(), r.height(), false);
@ -387,6 +401,111 @@ void GSDeviceOGL::IASetPrimitiveTopology(int topology)
m_topology = topology;
}
void GSDeviceOGL::PSSetSamplerState(SamplerStateOGL* ss)
{
if(ss && m_ps_ss != ss)
{
glActiveTexture(GL_TEXTURE0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, ss->wrap.s);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, ss->wrap.t);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, ss->filter);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, ss->filter);
glActiveTexture(GL_TEXTURE1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, ss->wrap.s);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, ss->wrap.t);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, ss->filter);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, ss->filter);
glActiveTexture(GL_TEXTURE2);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_POINT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_POINT);
glActiveTexture(GL_TEXTURE3);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_POINT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_POINT);
m_ps_ss = ss;
}
}
void GSDeviceOGL::RSSet(int width, int height, const GSVector4i* scissor)
{
if(m_viewport.x != width || m_viewport.y != height)
{
glViewport(0, 0, width, height); CheckError();
m_viewport = GSVector2i(width, height);
}
GSVector4i r = scissor ? *scissor : GSVector4i(0, 0, width, height);
if(!m_scissor.eq(r))
{
glScissor(r.left, r.top, r.width(), r.height()); CheckError();
m_scissor = r;
}
}
void GSDeviceOGL::OMSetDepthStencilState(DepthStencilStateOGL* dss)
{
if(m_dss != dss)
{
if(dss->depth.enable)
{
glEnable(GL_DEPTH_TEST); CheckError();
glDepthFunc(dss->depth.func); CheckError();
glDepthMask(dss->depth.write); CheckError();
}
else
{
glDisable(GL_DEPTH_TEST); CheckError();
}
if(dss->stencil.enable)
{
glEnable(GL_STENCIL_TEST); CheckError();
glStencilFunc(dss->stencil.func, dss->stencil.ref, dss->stencil.mask); CheckError();
glStencilOp(dss->stencil.sfail, dss->stencil.dpfail, dss->stencil.dppass); CheckError();
glStencilMask(dss->stencil.wmask); CheckError();
}
else
{
glDisable(GL_STENCIL_TEST); CheckError();
}
m_dss = dss;
}
}
void GSDeviceOGL::OMSetBlendState(BlendStateOGL* bs, float bf)
{
if(m_bs != bs || m_bf != bf)
{
if(bs->enable)
{
glEnable(GL_BLEND); CheckError();
glBlendEquationSeparate(bs->modeRGB, bs->modeAlpha); CheckError();
glBlendFuncSeparate(bs->srcRGB, bs->dstRGB, bs->srcAlpha, bs->dstAlpha); CheckError();
glBlendColor(bf, bf, bf, 0); CheckError();
}
else
{
glDisable(GL_BLEND); CheckError();
}
glColorMask(bs->mask.r, bs->mask.g, bs->mask.b, bs->mask.a); CheckError();
m_bs = bs;
m_bf = bf;
}
}
void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
{
GLuint rti = 0;
@ -395,17 +514,17 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
if(rt) rti = *(GSTextureOGL*)rt;
if(ds) dsi = *(GSTextureOGL*)ds;
// TODO: if(m_rt != rti)
if(m_rt != rti)
{
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, rti); CheckError();
// TODO: m_rt = rti;
m_rt = rti;
}
// TODO: if(m_ds != dsi)
if(m_ds != dsi)
{
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_DEPTH_COMPONENT, dsi); CheckError();
// TODO: m_ds = dsi;
m_ds = dsi;
}
}

View File

@ -24,6 +24,46 @@
#include "GSDevice.h"
#include "GSTextureOGL.h"
struct SamplerStateOGL
{
struct {GLenum s, t;} wrap;
GLenum filter;
};
struct DepthStencilStateOGL
{
struct
{
bool enable;
bool write;
GLenum func;
} depth;
struct
{
bool enable;
GLenum func;
GLint ref;
GLuint mask;
GLenum sfail;
GLenum dpfail;
GLenum dppass;
GLuint wmask;
} stencil;
};
struct BlendStateOGL
{
bool enable;
GLenum srcRGB;
GLenum dstRGB;
GLenum srcAlpha;
GLenum dstAlpha;
GLenum modeRGB;
GLenum modeAlpha;
union {uint8 r:1, g:1, b:1, a:1;} mask;
};
class GSDeviceOGL : public GSDevice
{
#ifdef _WINDOWS
@ -42,11 +82,19 @@ class GSDeviceOGL : public GSDevice
} m_vertices;
int m_topology;
SamplerStateOGL* m_ps_ss;
GSVector4i m_scissor;
GSVector2i m_viewport;
DepthStencilStateOGL* m_dss;
BlendStateOGL* m_bs;
float m_bf;
GLuint m_rt;
GLuint m_ds;
//
CGcontext m_context;
static void OnStaticCgError(CGcontext ctx, CGerror err, void* p) {((GSDeviceOGL*)p)->OnCgError(ctx, err);}
void OnCgError(CGcontext ctx, CGerror err);
@ -89,6 +137,10 @@ public:
void IASetInputLayout(); // TODO
void IASetPrimitiveTopology(int topology);
void PSSetSamplerState(SamplerStateOGL* ss);
void RSSet(int width, int height, const GSVector4i* scissor);
void OMSetDepthStencilState(DepthStencilStateOGL* dss);
void OMSetBlendState(BlendStateOGL* bs, float bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds);
static void CheckError()
@ -114,4 +166,21 @@ public:
printf("%d\n", status);
}
}
void CheckCgError()
{
CGerror error;
const char* s = cgGetLastErrorString(&error);
if(error != CG_NO_ERROR)
{
printf("%s\n", s);
if(error == CG_COMPILER_ERROR)
{
printf("%s\n", cgGetLastListing(m_context));
}
}
}
};

View File

@ -72,11 +72,6 @@ int GSLocalMemory::blockOffset4[256];
//
uint32 GSLocalMemory::m_xtbl[1024];
uint32 GSLocalMemory::m_ytbl[1024];
//
GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
//

View File

@ -841,8 +841,6 @@ public:
//
static uint32 m_xtbl[1024], m_ytbl[1024];
template<typename T> void ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, readTexel rt, readTexture rtx);
//

View File

@ -252,17 +252,11 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
GSTextureFX10::VSSelector vs_sel;
vs_sel.bpp = 0;
vs_sel.bppz = 0;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.prim = prim;
if(tex)
{
vs_sel.bpp = tex->m_bpp2;
}
if(om_dssel.zte && om_dssel.ztst > 0 && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
@ -349,7 +343,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
if(tex)
{
ps_sel.bpp = tex->m_bpp2;
ps_sel.bpp = tex->m_bpp;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();

View File

@ -50,6 +50,7 @@ bool GSRendererHW9::Create(const string& title)
m_date.dss.StencilWriteMask = 1;
m_date.dss.StencilFunc = D3DCMP_ALWAYS;
m_date.dss.StencilPassOp = D3DSTENCILOP_REPLACE;
m_date.dss.StencilRef = 1;
memset(&m_date.bs, 0, sizeof(m_date.bs));
@ -64,6 +65,7 @@ bool GSRendererHW9::Create(const string& title)
m_fba.dss.StencilPassOp = D3DSTENCILOP_ZERO;
m_fba.dss.StencilFailOp = D3DSTENCILOP_ZERO;
m_fba.dss.StencilDepthFailOp = D3DSTENCILOP_ZERO;
m_fba.dss.StencilRef = 2;
memset(&m_fba.bs, 0, sizeof(m_fba.bs));
@ -321,7 +323,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
if(tex)
{
ps_sel.bpp = tex->m_bpp2;
ps_sel.bpp = tex->m_bpp;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
@ -523,7 +525,7 @@ void GSRendererHW9::SetupDATE(GSTexture* rt, GSTexture* ds)
// om
dev->OMSetDepthStencilState(&m_date.dss, 1);
dev->OMSetDepthStencilState(&m_date.dss);
dev->OMSetBlendState(&m_date.bs, 0);
dev->OMSetRenderTargets(t, ds);
@ -575,7 +577,7 @@ void GSRendererHW9::UpdateFBA(GSTexture* rt)
// om
dev->OMSetDepthStencilState(&m_fba.dss, 2);
dev->OMSetDepthStencilState(&m_fba.dss);
dev->OMSetBlendState(&m_fba.bs, 0);
// ia

View File

@ -202,13 +202,14 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture()
{
const GIFRegTEX0& TEX0 = m_renderer->m_context->TEX0;
const GIFRegCLAMP& CLAMP = m_renderer->m_context->CLAMP;
const GIFRegTEXA& TEXA = m_renderer->m_env.TEXA;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const uint32* clut = m_renderer->m_mem.m_clut;
const int pal = GSLocalMemory::m_psm[TEX0.PSM].pal;
if(pal > 0)
if(psm.pal > 0)
{
m_renderer->m_mem.m_clut.Read(TEX0);
m_renderer->m_mem.m_clut.Read32(TEX0, TEXA);
/*
POSITION pos = m_tex.GetHeadPosition();
@ -267,12 +268,17 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture()
{
GSCachedTexture* t2 = *i;
if((((t2->m_TEX0.u32[0] ^ TEX0.u32[0]) & 0xffefffff) | ((t2->m_TEX0.u32[1] ^ TEX0.u32[1]) & 3)) != 0) // TBP0 TBW (PSM & ~1) TW TH
if(((t2->m_TEX0.u32[0] ^ TEX0.u32[0]) | ((t2->m_TEX0.u32[1] ^ TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
{
continue;
}
if(!(pal == 0 || t2->m_TEX0.CPSM == TEX0.CPSM && GSVector4i::compare(t2->m_clut, clut, pal * sizeof(clut[0]))))
if((psm.trbpp == 16 || psm.trbpp == 24) && TEX0.TCC && TEXA != t2->m_TEXA)
{
continue;
}
if(psm.pal > 0 && !(t2->m_TEX0.CPSM == TEX0.CPSM && GSVector4i::compare(t2->m_clut, clut, psm.pal * sizeof(clut[0]))))
{
continue;
}
@ -344,23 +350,23 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture()
m_tex.push_front(t);
}
if(pal > 0)
if(psm.pal > 0)
{
int size = pal * sizeof(clut[0]);
int size = psm.pal * sizeof(clut[0]);
if(t->m_palette)
{
if(t->m_initpalette)
{
memcpy(t->m_clut, clut, size);
t->m_palette->Update(GSVector4i(0, 0, pal, 1), t->m_clut, size);
t->m_palette->Update(GSVector4i(0, 0, psm.pal, 1), t->m_clut, size);
t->m_initpalette = false;
}
else
{
if(GSVector4i::update(t->m_clut, clut, size))
{
t->m_palette->Update(GSVector4i(0, 0, pal, 1), t->m_clut, size);
t->m_palette->Update(GSVector4i(0, 0, psm.pal, 1), t->m_clut, size);
}
}
}
@ -728,11 +734,11 @@ void GSTextureCache::GSDepthStencil::Update()
GSTextureCache::GSCachedTexture::GSCachedTexture(GSRenderer* r)
: GSSurface(r)
, m_valid(0, 0, 0, 0)
, m_bpp(0)
, m_bpp2(0)
, m_rendered(false)
{
m_valid = GSVector4i::zero();
m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 16);
memset(m_clut, 0, sizeof(m_clut));
@ -767,7 +773,7 @@ void GSTextureCache::GSCachedTexture::Update()
{
// in dx9 managed textures can be written directly, less copying is faster, but still not as fast as dx10's UpdateResource
m_renderer->m_mem.ReadTextureNP(r, m.bits, m.pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA);
m_renderer->m_mem.ReadTexture(r, m.bits, m.pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA);
m_texture->Unmap();
}
@ -777,12 +783,12 @@ void GSTextureCache::GSCachedTexture::Update()
int pitch = ((r.width() + 3) & ~3) * 4;
m_renderer->m_mem.ReadTextureNP(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA);
m_renderer->m_mem.ReadTexture(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA);
m_texture->Update(r, buff, pitch);
}
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, r.width() * r.height() * m_bpp >> 3);
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, r.width() * r.height() * 4);
}
bool GSTextureCache::GSCachedTexture::GetDirtyRect(GSVector4i& rr)

View File

@ -38,6 +38,7 @@ public:
int m_age;
GSDirtyRectList m_dirty;
GIFRegTEX0 m_TEX0;
GIFRegTEXA m_TEXA;
explicit GSSurface(GSRenderer* r);
virtual ~GSSurface();
@ -79,7 +80,6 @@ public:
uint32* m_clut; // *
GSVector4i m_valid;
int m_bpp;
int m_bpp2;
bool m_rendered;
explicit GSCachedTexture(GSRenderer* renderer);

View File

@ -129,50 +129,13 @@ bool GSTextureCache10::GSCachedTextureHW10::Create()
// m_renderer->m_perfmon.Put(GSPerfMon::WriteTexture, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
uint32 psm = m_TEX0.PSM;
switch(psm)
{
case PSM_PSMT8:
case PSM_PSMT8H:
case PSM_PSMT4:
case PSM_PSMT4HL:
case PSM_PSMT4HH:
psm = m_TEX0.CPSM;
break;
}
DXGI_FORMAT format;
switch(psm)
{
default:
// printf("Invalid TEX0.PSM/CPSM (%I64d, %I64d)\n", m_TEX0.PSM, m_TEX0.CPSM);
case PSM_PSMCT32:
m_bpp = 32;
m_bpp2 = 0;
format = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
case PSM_PSMCT24:
m_bpp = 32;
m_bpp2 = 1;
format = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_bpp = 16;
m_bpp2 = 5;
format = DXGI_FORMAT_R16_UNORM;
break;
}
int w = 1 << m_TEX0.TW;
int h = 1 << m_TEX0.TH;
m_bpp = 0;
ASSERT(m_texture == NULL);
m_texture = m_renderer->m_dev->CreateTexture(w, h, format);
m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH);
return m_texture != NULL;
}
@ -186,6 +149,8 @@ bool GSTextureCache10::GSCachedTextureHW10::Create(GSRenderTarget* rt)
// m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
m_rendered = true;
int tw = 1 << m_TEX0.TW;
@ -309,18 +274,18 @@ bool GSTextureCache10::GSCachedTextureHW10::Create(GSRenderTarget* rt)
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_bpp2 = 0;
m_bpp = 0;
break;
case PSM_PSMCT24:
m_bpp2 = 1;
m_bpp = 1;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_bpp2 = 2;
m_bpp = 2;
break;
case PSM_PSMT8H:
m_bpp2 = 3;
m_palette = m_renderer->m_dev->CreateTexture(256, 1, m_TEX0.CPSM == PSM_PSMCT32 ? DXGI_FORMAT_R8G8B8A8_UNORM : DXGI_FORMAT_R16_UNORM); //
m_bpp = 3;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
case PSM_PSMT4HL:

View File

@ -127,50 +127,13 @@ bool GSTextureCache9::GSCachedTexture9::Create()
// m_renderer->m_perfmon.Put(GSPerfMon::WriteTexture, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
uint32 psm = m_TEX0.PSM;
switch(psm)
{
case PSM_PSMT8:
case PSM_PSMT8H:
case PSM_PSMT4:
case PSM_PSMT4HL:
case PSM_PSMT4HH:
psm = m_TEX0.CPSM;
break;
}
D3DFORMAT format;
switch(psm)
{
default:
// printf("Invalid TEX0.PSM/CPSM (%I64d, %I64d)\n", m_TEX0.PSM, m_TEX0.CPSM);
case PSM_PSMCT32:
m_bpp = 32;
m_bpp2 = 0;
format = D3DFMT_A8R8G8B8;
break;
case PSM_PSMCT24:
m_bpp = 32;
m_bpp2 = 1;
format = D3DFMT_A8R8G8B8;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_bpp = 16;
m_bpp2 = 2;
format = D3DFMT_A1R5G5B5;
break;
}
int w = 1 << m_TEX0.TW;
int h = 1 << m_TEX0.TH;
m_bpp = 0;
ASSERT(m_texture == NULL);
m_texture = m_renderer->m_dev->CreateTexture(w, h, format);
m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH);
return m_texture != NULL;
}
@ -184,6 +147,8 @@ bool GSTextureCache9::GSCachedTexture9::Create(GSRenderTarget* rt)
// m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
m_rendered = true;
int tw = 1 << m_TEX0.TW;
@ -307,18 +272,18 @@ bool GSTextureCache9::GSCachedTexture9::Create(GSRenderTarget* rt)
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_bpp2 = 0;
m_bpp = 0;
break;
case PSM_PSMCT24:
m_bpp2 = 1;
m_bpp = 1;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_bpp2 = 2;
m_bpp = 2;
break;
case PSM_PSMT8H:
m_bpp2 = 3;
m_palette = m_renderer->m_dev->CreateTexture(256, 1, m_TEX0.CPSM == PSM_PSMCT32 ? D3DFMT_A8R8G8B8 : D3DFMT_A1R5G5B5);
m_bpp = 3;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
case PSM_PSMT4HL:

View File

@ -51,7 +51,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
continue;
}
if((psm.trbpp == 16 || psm.trbpp == 24) && (t2->m_TEX0.TCC != TEX0.TCC || TEX0.TCC && TEXA != t2->m_TEXA))
if((psm.trbpp == 16 || psm.trbpp == 24) && TEX0.TCC && TEXA != t2->m_TEXA)
{
continue;
}

View File

@ -104,21 +104,19 @@ bool GSTextureFX10::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
if(i == m_vs.end())
{
string str[5];
string str[4];
str[0] = format("%d", sel.bpp);
str[1] = format("%d", sel.bppz);
str[2] = format("%d", sel.tme);
str[3] = format("%d", sel.fst);
str[4] = format("%d", sel.prim);
str[0] = format("%d", sel.bppz);
str[1] = format("%d", sel.tme);
str[2] = format("%d", sel.fst);
str[3] = format("%d", sel.prim);
D3D10_SHADER_MACRO macro[] =
{
{"VS_BPP", str[0].c_str()},
{"VS_BPPZ", str[1].c_str()},
{"VS_TME", str[2].c_str()},
{"VS_FST", str[3].c_str()},
{"VS_PRIM", str[4].c_str()},
{"VS_BPPZ", str[0].c_str()},
{"VS_TME", str[1].c_str()},
{"VS_FST", str[2].c_str()},
{"VS_PRIM", str[3].c_str()},
{NULL, NULL},
};

View File

@ -63,7 +63,6 @@ public:
{
struct
{
uint32 bpp:3;
uint32 bppz:2;
uint32 tme:1;
uint32 fst:1;
@ -72,7 +71,7 @@ public:
uint32 key;
operator uint32() {return key & 0x3ff;}
operator uint32() {return key & 0x7f;}
};
__declspec(align(16)) struct PSConstantBuffer

View File

@ -293,6 +293,7 @@ void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
dss->StencilPassOp = dssel.fba ? D3DSTENCILOP_REPLACE : D3DSTENCILOP_KEEP;
dss->StencilFailOp = dssel.fba ? D3DSTENCILOP_ZERO : D3DSTENCILOP_KEEP;
dss->StencilDepthFailOp = dssel.fba ? D3DSTENCILOP_ZERO : D3DSTENCILOP_KEEP;
dss->StencilRef = 3;
}
if(!(dssel.zte && dssel.ztst == 1 && !dssel.zwe))
@ -315,7 +316,7 @@ void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
i = m_om_dss.find(dssel);
}
m_dev->OMSetDepthStencilState((*i).second, 3);
m_dev->OMSetDepthStencilState((*i).second);
hash_map<uint32, Direct3DBlendState9*>::const_iterator j = m_om_bs.find(bsel);

View File

@ -22,8 +22,7 @@ struct VS_OUTPUT
float4 c : COLOR0;
};
#ifndef VS_BPP
#define VS_BPP 0
#ifndef VS_BPPZ
#define VS_BPPZ 0
#define VS_TME 1
#define VS_FST 1
@ -193,23 +192,6 @@ struct PS_OUTPUT
#define LTF 1
#endif
float4 Normalize16(float4 f)
{
return f / float4(0x001f, 0x03e0, 0x7c00, 0x8000);
}
float4 Extract16(uint i)
{
float4 f;
f.r = i & 0x001f;
f.g = i & 0x03e0;
f.b = i & 0x7c00;
f.a = i & 0x8000;
return f;
}
float4 wrapuv(float4 uv)
{
if(WMS == WMT)
@ -318,7 +300,7 @@ float4 sample(float2 tc, float w)
float4 t00, t01, t10, t11;
if(BPP == 3) // 8HP + 32-bit palette
if(BPP == 3) // 8HP
{
float4 a;
@ -332,26 +314,6 @@ float4 sample(float2 tc, float w)
t10 = Palette.Sample(PaletteSampler, a.z);
t11 = Palette.Sample(PaletteSampler, a.w);
}
else if(BPP == 4) // 8HP + 16-bit palette
{
// TODO: yuck, just pre-convert the palette to 32-bit
}
else if(BPP == 5) // 16P
{
float4 r;
r.x = Texture.Sample(TextureSampler, uv.xy).r;
r.y = Texture.Sample(TextureSampler, uv.zy).r;
r.z = Texture.Sample(TextureSampler, uv.xw).r;
r.w = Texture.Sample(TextureSampler, uv.zw).r;
uint4 i = r * 65535;
t00 = Extract16(i.x);
t01 = Extract16(i.y);
t10 = Extract16(i.z);
t11 = Extract16(i.w);
}
else
{
t00 = Texture.Sample(TextureSampler, uv.xy);
@ -374,13 +336,8 @@ float4 sample(float2 tc, float w)
{
t.a = AEM == 0 || any(t.rgb) ? TA.x : 0;
}
else if(BPP == 2 || BPP == 5) // 16 || 16P
else if(BPP == 2) // 16
{
if(BPP == 5)
{
t = Normalize16(t);
}
// a bit incompatible with up-scaling because the 1 bit alpha is interpolated
t.a = t.a >= 0.5 ? TA.y : AEM == 0 || any(t.rgb) ? TA.x : 0;

View File

@ -219,7 +219,7 @@ float4 sample(float2 tc, float w)
float4 t00, t01, t10, t11;
if(BPP == 3) // 8HP ln
if(BPP == 3) // 8HP
{
float4 a;