mirror of https://github.com/PCSX2/pcsx2.git
gsdx: date/datm fix for 16-bit frame buffer in sw rendering mode
This commit is contained in:
parent
1f402b1b56
commit
a1a842b07f
|
@ -113,13 +113,13 @@ bool GPURenderer::Merge()
|
|||
|
||||
GSVector2i s = st[0]->GetSize();
|
||||
|
||||
GSVector4 sRect[2];
|
||||
GSVector4 dRect[2];
|
||||
GSVector4 sr[2];
|
||||
GSVector4 dr[2];
|
||||
|
||||
sRect[0] = GSVector4(0, 0, 1, 1);
|
||||
dRect[0] = GSVector4(0, 0, s.x, s.y);
|
||||
sr[0] = GSVector4(0, 0, 1, 1);
|
||||
dr[0] = GSVector4(0, 0, s.x, s.y);
|
||||
|
||||
m_dev->Merge(st, sRect, dRect, s, 1, 1, GSVector4(0, 0, 0, 1));
|
||||
m_dev->Merge(st, sr, dr, s, 1, 1, GSVector4(0, 0, 0, 1));
|
||||
|
||||
if(m_shadeboost)
|
||||
{
|
||||
|
|
|
@ -1148,7 +1148,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
{
|
||||
if(sel.fpsm == 2)
|
||||
{
|
||||
test |= fd.srl32(15) == GSVector8i::zero();
|
||||
// test |= fd.srl32(15) == GSVector8i::zero();
|
||||
test |= fd.sll32(16).sra32(31) == GSVector8i::zero();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1159,7 +1160,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
{
|
||||
if(sel.fpsm == 2)
|
||||
{
|
||||
test |= fd.sll32(16).sra32(31);
|
||||
test |= fd.sll32(16).sra32(31); // == GSVector8i::xffffffff();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2264,7 +2265,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
{
|
||||
if(sel.fpsm == 2)
|
||||
{
|
||||
test |= fd.srl32(15) == GSVector4i::zero();
|
||||
// test |= fd.srl32(15) == GSVector4i::zero();
|
||||
test |= fd.sll32(16).sra32(31) == GSVector4i::zero();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2275,7 +2277,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
{
|
||||
if(sel.fpsm == 2)
|
||||
{
|
||||
test |= fd.sll32(16).sra32(31);
|
||||
test |= fd.sll32(16).sra32(31); // == GSVector4i::xffffffff();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -1326,7 +1326,9 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
if(m_sel.fpsm == 2)
|
||||
{
|
||||
vpxor(xmm0, xmm0);
|
||||
vpsrld(xmm1, xmm6, 15);
|
||||
//vpsrld(xmm1, xmm6, 15);
|
||||
vpslld(xmm1, xmm6, 16);
|
||||
vpsrad(xmm1, 31);
|
||||
vpcmpeqd(xmm1, xmm0);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -2330,7 +2330,9 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
if(m_sel.fpsm == 2)
|
||||
{
|
||||
vpxor(xmm0, xmm0);
|
||||
vpsrld(xmm1, xmm2, 15);
|
||||
//vpsrld(xmm1, xmm2, 15);
|
||||
vpslld(xmm1, xmm2, 16);
|
||||
vpsrad(xmm1, 31);
|
||||
vpcmpeqd(xmm1, xmm0);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -2295,7 +2295,9 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
if(m_sel.fpsm == 2)
|
||||
{
|
||||
vpxor(ymm0, ymm0);
|
||||
vpsrld(ymm1, ymm2, 15);
|
||||
//vpsrld(ymm1, ymm2, 15);
|
||||
vpslld(ymm1, ymm2, 16);
|
||||
vpsrad(ymm1, 31);
|
||||
vpcmpeqd(ymm1, ymm0);
|
||||
}
|
||||
else
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -108,6 +108,7 @@ class GSRendererCL : public GSRenderer
|
|||
uint32 noscissor:1; // 53
|
||||
uint32 tpsm:4; // 54
|
||||
uint32 aem:1; // 58
|
||||
uint32 merged:1; // 59
|
||||
// TODO
|
||||
};
|
||||
|
||||
|
@ -148,6 +149,7 @@ class GSRendererCL : public GSRenderer
|
|||
{
|
||||
GSVector4i scissor;
|
||||
GSVector4i dimx; // 4x4 signed char
|
||||
TFXSelector sel;
|
||||
uint32 fbp, zbp, bw;
|
||||
uint32 fm, zm;
|
||||
uint32 fog; // rgb
|
||||
|
@ -172,6 +174,7 @@ class GSRendererCL : public GSRenderer
|
|||
GSVector4i* src_pages; // read by any texture level
|
||||
GSVector4i* dst_pages; // f/z writes to it
|
||||
uint32 fbp, zbp, bw;
|
||||
uint32 fpsm, zpsm, tpsm;
|
||||
#ifdef DEBUG
|
||||
TFXParameter* pb;
|
||||
#endif
|
||||
|
@ -227,16 +230,23 @@ class GSRendererCL : public GSRenderer
|
|||
|
||||
void Enqueue();
|
||||
void EnqueueTFX(std::list<shared_ptr<TFXJob>>& jobs, uint32 bin_count, const cl_uchar4& bin_dim);
|
||||
void UpdateTextureCache(TFXJob* job);
|
||||
void JoinTFX(std::list<shared_ptr<TFXJob>>& jobs);
|
||||
bool UpdateTextureCache(TFXJob* job);
|
||||
void InvalidateTextureCache(TFXJob* job);
|
||||
void UsePages(uint32* pages);
|
||||
void ReleasePages(uint32* pages);
|
||||
|
||||
static void CL_CALLBACK ReleasePageEvent(cl_event event, cl_int event_command_exec_status, void* user_data);
|
||||
|
||||
protected:
|
||||
GSTexture* m_texture[2];
|
||||
uint8* m_output;
|
||||
|
||||
GSVector4i m_rw_pages[2][4]; // pages that may be read or modified by the rendering queue, f/z rw, tex r
|
||||
GSVector4i m_tc_pages[4]; // invalidated texture cache pages (split this into 8:24?)
|
||||
GSVector4i m_tmp_pages[4]; // TODO: this should be block level, too many overlaps inside pages with render targets
|
||||
GSVector4i m_tc_pages[4]; // invalidated texture cache pages (split this into 8:24?) // TODO: this should be block level, too many overlaps inside pages with render targets
|
||||
GSVector4i m_tmp_pages[4];
|
||||
uint32 m_tmp_pages2[MAX_PAGES + 1];
|
||||
uint32 m_rw_pages_rendering[512]; // pages that are currently in-use
|
||||
|
||||
void Reset();
|
||||
void VSync(int field);
|
||||
|
|
|
@ -66,6 +66,9 @@ GSState::GSState()
|
|||
//s_dump = 1;
|
||||
//s_save = 1;
|
||||
//s_savez = 1;
|
||||
//s_savet = 1;
|
||||
//s_savef = 1;
|
||||
//s_saven = 656;
|
||||
|
||||
UserHacks_WildHack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_WildHack", 0) : 0;
|
||||
m_crc_hack_level = theApp.GetConfig("crc_hack_level", 3);
|
||||
|
|
|
@ -224,7 +224,7 @@ bool GSUtil::CheckSSE()
|
|||
return true;
|
||||
}
|
||||
|
||||
#define OCL_PROGRAM_VERSION 1
|
||||
#define OCL_PROGRAM_VERSION 3
|
||||
|
||||
#ifdef ENABLE_OPENCL
|
||||
void GSUtil::GetDeviceDescs(list<OCLDeviceDesc>& dl)
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
#if defined(CL_VERSION_2_0)
|
||||
|
||||
#error hello
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(CL_VERSION_1_1) || defined(CL_VERSION_1_2) // make safe to include in resource file to enforce dependency
|
||||
|
||||
#ifdef cl_amd_printf
|
||||
|
@ -26,6 +32,13 @@
|
|||
#error "MAX_PRIM_PER_BATCH != 32u OR 64u"
|
||||
#endif
|
||||
|
||||
#define TFX_ABA(sel) ((sel.x >> 24) & 3)
|
||||
#define TFX_ABB(sel) ((sel.x >> 26) & 3)
|
||||
#define TFX_ABC(sel) ((sel.x >> 28) & 3)
|
||||
#define TFX_ABD(sel) ((sel.x >> 30) & 3)
|
||||
#define TFX_WMS(sel) ((sel.y >> 8) & 3)
|
||||
#define TFX_WMT(sel) ((sel.y >> 10) & 3)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union {float4 p; struct {float x, y; uint z, f;};};
|
||||
|
@ -35,9 +48,9 @@ typedef struct
|
|||
typedef struct
|
||||
{
|
||||
gs_vertex v[3];
|
||||
uint zmin;
|
||||
uint zmin, zmax;
|
||||
uint pb_index;
|
||||
uint _pad[2];
|
||||
uint _pad;
|
||||
} gs_prim;
|
||||
|
||||
typedef struct
|
||||
|
@ -60,6 +73,7 @@ typedef struct
|
|||
{
|
||||
int4 scissor;
|
||||
char dimx[4][4];
|
||||
uint2 sel;
|
||||
int fbp, zbp, bw;
|
||||
uint fm, zm;
|
||||
uchar4 fog; // rgb
|
||||
|
@ -110,10 +124,10 @@ enum GS_TFX
|
|||
|
||||
enum GS_CLAMP
|
||||
{
|
||||
CLAMP_REPEAT = 0,
|
||||
CLAMP_CLAMP = 1,
|
||||
CLAMP_REGION_CLAMP = 2,
|
||||
CLAMP_REGION_REPEAT = 3,
|
||||
CLAMP_REGION_REPEAT = 0,
|
||||
CLAMP_REPEAT = 1,
|
||||
CLAMP_CLAMP = 2,
|
||||
CLAMP_REGION_CLAMP = 3,
|
||||
};
|
||||
|
||||
enum GS_ZTST
|
||||
|
@ -603,6 +617,7 @@ __kernel void KERNEL_PRIM(
|
|||
// only interpolate the relative to zmin and hopefully small values
|
||||
|
||||
uint zmin = min(min(v0->z, v1->z), v2->z);
|
||||
uint zmax = max(max(v0->z, v1->z), v2->z);
|
||||
|
||||
prim->v[0].p = (float4)(v0->p.x, v0->p.y, as_float(v0->z - zmin), v0->p.w);
|
||||
prim->v[0].tc = v0->tc;
|
||||
|
@ -612,6 +627,7 @@ __kernel void KERNEL_PRIM(
|
|||
prim->v[2].tc = v2->tc;
|
||||
|
||||
prim->zmin = zmin;
|
||||
prim->zmax = zmax;
|
||||
|
||||
float4 dp0 = v1->p - v0->p;
|
||||
float4 dp1 = v0->p - v2->p;
|
||||
|
@ -969,27 +985,34 @@ bool DestAlphaTest(uint fd)
|
|||
|
||||
int Wrap(int a, int b, int c, int mode)
|
||||
{
|
||||
switch(mode)
|
||||
if(MERGED)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
return a & b;
|
||||
case CLAMP_CLAMP:
|
||||
return clamp(a, 0, c);
|
||||
case CLAMP_REGION_CLAMP:
|
||||
return clamp(a, b, c);
|
||||
case CLAMP_REGION_REPEAT:
|
||||
return (a & b) | c;
|
||||
return select((a & b) | c, clamp(a, b, c), (mode & 2) != 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(mode)
|
||||
{
|
||||
case CLAMP_REGION_REPEAT:
|
||||
return (a & b) | c;
|
||||
case CLAMP_REPEAT:
|
||||
return a & b;
|
||||
case CLAMP_CLAMP:
|
||||
return clamp(a, 0, c);
|
||||
case CLAMP_REGION_CLAMP:
|
||||
return clamp(a, b, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int4 AlphaBlend(int4 c, int afix, uint fd)
|
||||
int4 AlphaBlend(int4 c, uint fd, int afix, uint2 sel)
|
||||
{
|
||||
if(FWRITE && (ABE || AA1))
|
||||
{
|
||||
int4 cs = c;
|
||||
int4 cd;
|
||||
|
||||
if(ABA != ABB && (ABA == 1 || ABB == 1 || ABC == 1) || ABD == 1)
|
||||
if(ABA != ABB && (ABA == 1 || ABB == 1 || ABC == 1) || ABD == 1 || MERGED)
|
||||
{
|
||||
if(is32bit(FPSM) || is24bit(FPSM))
|
||||
{
|
||||
|
@ -1007,50 +1030,69 @@ int4 AlphaBlend(int4 c, int afix, uint fd)
|
|||
}
|
||||
}
|
||||
|
||||
if(ABA != ABB)
|
||||
if(MERGED)
|
||||
{
|
||||
switch(ABA)
|
||||
{
|
||||
case 0: break; // c.xyz = cs.xyz;
|
||||
case 1: c.xyz = cd.xyz; break;
|
||||
case 2: c.xyz = 0; break;
|
||||
}
|
||||
int aba = TFX_ABA(sel);
|
||||
int abb = TFX_ABB(sel);
|
||||
int abc = TFX_ABC(sel);
|
||||
int abd = TFX_ABD(sel);
|
||||
|
||||
switch(ABB)
|
||||
{
|
||||
case 0: c.xyz -= cs.xyz; break;
|
||||
case 1: c.xyz -= cd.xyz; break;
|
||||
case 2: break;
|
||||
}
|
||||
int ad = !is24bit(FPSM) ? cd.w : 0x80;
|
||||
|
||||
if(!(is24bit(FPSM) && ABC == 1))
|
||||
{
|
||||
int a = 0;
|
||||
int3 A = aba == 0 ? cs.xyz : aba == 1 ? cd.xyz : 0;
|
||||
int3 B = abb == 0 ? cs.xyz : abb == 1 ? cd.xyz : 0;
|
||||
int C = abc == 0 ? cs.w : abc == 1 ? ad : afix;
|
||||
int3 D = abd == 0 ? cs.xyz : abd == 1 ? cd.xyz : 0;
|
||||
|
||||
switch(ABC)
|
||||
{
|
||||
case 0: a = cs.w; break;
|
||||
case 1: a = cd.w; break;
|
||||
case 2: a = afix; break;
|
||||
}
|
||||
|
||||
c.xyz = c.xyz * a >> 7;
|
||||
}
|
||||
|
||||
switch(ABD)
|
||||
{
|
||||
case 0: c.xyz += cs.xyz; break;
|
||||
case 1: c.xyz += cd.xyz; break;
|
||||
case 2: break;
|
||||
}
|
||||
c.xyz = (mul24(A - B, C) >> 7) + D;
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(ABD)
|
||||
if(ABA != ABB)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: c.xyz = cd.xyz; break;
|
||||
case 2: c.xyz = 0; break;
|
||||
switch(ABA)
|
||||
{
|
||||
case 0: break; // c.xyz = cs.xyz;
|
||||
case 1: c.xyz = cd.xyz; break;
|
||||
case 2: c.xyz = 0; break;
|
||||
}
|
||||
|
||||
switch(ABB)
|
||||
{
|
||||
case 0: c.xyz -= cs.xyz; break;
|
||||
case 1: c.xyz -= cd.xyz; break;
|
||||
case 2: break;
|
||||
}
|
||||
|
||||
if(!(is24bit(FPSM) && ABC == 1))
|
||||
{
|
||||
int a = 0;
|
||||
|
||||
switch(ABC)
|
||||
{
|
||||
case 0: a = cs.w; break;
|
||||
case 1: a = cd.w; break;
|
||||
case 2: a = afix; break;
|
||||
}
|
||||
|
||||
c.xyz = c.xyz * a >> 7;
|
||||
}
|
||||
|
||||
switch(ABD)
|
||||
{
|
||||
case 0: c.xyz += cs.xyz; break;
|
||||
case 1: c.xyz += cd.xyz; break;
|
||||
case 2: break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(ABD)
|
||||
{
|
||||
case 0: break;
|
||||
case 1: c.xyz = cd.xyz; break;
|
||||
case 2: c.xyz = 0; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1150,8 +1192,6 @@ int4 SampleTexture(__global uchar* tex, __global gs_param* pb, float3 t)
|
|||
if(!FST)
|
||||
{
|
||||
uv = convert_int2_rte(t.xy * native_recip(t.z));
|
||||
|
||||
if(LTF) uv -= 0x0008;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1167,15 +1207,17 @@ int4 SampleTexture(__global uchar* tex, __global gs_param* pb, float3 t)
|
|||
uv = convert_int2(t.xy);
|
||||
}
|
||||
|
||||
if(LTF) uv -= 0x0008;
|
||||
|
||||
int2 uvf = uv & 0x000f;
|
||||
|
||||
int2 uv0 = uv >> 4;
|
||||
int2 uv1 = uv0 + 1;
|
||||
|
||||
uv0.x = Wrap(uv0.x, pb->minu, pb->maxu, WMS);
|
||||
uv0.y = Wrap(uv0.y, pb->minv, pb->maxv, WMT);
|
||||
uv1.x = Wrap(uv1.x, pb->minu, pb->maxu, WMS);
|
||||
uv1.y = Wrap(uv1.y, pb->minv, pb->maxv, WMT);
|
||||
uv0.x = Wrap(uv0.x, pb->minu, pb->maxu, MERGED ? TFX_WMS(pb->sel) : WMS);
|
||||
uv0.y = Wrap(uv0.y, pb->minv, pb->maxv, MERGED ? TFX_WMT(pb->sel) : WMT);
|
||||
uv1.x = Wrap(uv1.x, pb->minu, pb->maxu, MERGED ? TFX_WMS(pb->sel) : WMS);
|
||||
uv1.y = Wrap(uv1.y, pb->minv, pb->maxv, MERGED ? TFX_WMT(pb->sel) : WMT);
|
||||
|
||||
int4 c00 = ReadTexel(tex, uv0.x, uv0.y, 0, pb);
|
||||
int4 c01 = ReadTexel(tex, uv1.x, uv0.y, 0, pb);
|
||||
|
@ -1356,6 +1398,11 @@ __kernel __attribute__((reqd_work_group_size(8, 8, 1))) void KERNEL_TFX(
|
|||
{
|
||||
// TODO: aa1: draw edge as a line
|
||||
|
||||
if(!ZTest(prim->zmax, zd))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
__global gs_barycentric* b = &barycentric[prim_index + i];
|
||||
|
||||
float3 f = b->dx.xyz * (pf.x - b->dx.w) + b->dy.xyz * (pf.y - b->dy.w) + (float3)(0, 0, 1);
|
||||
|
@ -1420,8 +1467,6 @@ __kernel __attribute__((reqd_work_group_size(8, 8, 1))) void KERNEL_TFX(
|
|||
|
||||
if(TFX != TFX_NONE)
|
||||
{
|
||||
tex = vm; // TODO: use the texture cache
|
||||
|
||||
ct = SampleTexture(tex, pb, t);
|
||||
}
|
||||
|
||||
|
@ -1515,7 +1560,7 @@ __kernel __attribute__((reqd_work_group_size(8, 8, 1))) void KERNEL_TFX(
|
|||
|
||||
// alpha blend
|
||||
|
||||
c = AlphaBlend(c, pb->afix, fd);
|
||||
c = AlphaBlend(c, fd, pb->afix, pb->sel);
|
||||
|
||||
// write frame
|
||||
|
||||
|
|
|
@ -123,8 +123,15 @@ using namespace std;
|
|||
#include <GL/wglext.h>
|
||||
#include "GLLoader.h"
|
||||
|
||||
#if _MSC_VER >= 1800
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#define hash_map unordered_map
|
||||
#define hash_set unordered_set
|
||||
#else
|
||||
#include <hash_map>
|
||||
#include <hash_set>
|
||||
#endif
|
||||
|
||||
using namespace stdext;
|
||||
|
||||
|
|
Loading…
Reference in New Issue