Merge pull request #628 from PCSX2/gsdx-depth-and-16-bits-support

Gsdx 1.0
This commit is contained in:
Gregory Hainaut 2015-07-01 20:50:09 +02:00
commit de644c5437
42 changed files with 1399 additions and 439 deletions

View File

@ -7,7 +7,7 @@ endif()
# plugin name
set(Output GSdx-0.1.16)
set(Output GSdx-1.0.0)
set(CommonFlags
-fno-operator-names # because Xbyak uses and()/xor()/or()/not() function

View File

@ -391,8 +391,7 @@ namespace GLLoader {
}
const char* vendor = (const char*)glGetString(GL_VENDOR);
fprintf(stderr, "Supported Opengl version: %s on GPU: %s. Vendor: %s\n", s, glGetString(GL_RENDERER), vendor);
fprintf(stderr, "Note: the maximum version supported by GSdx is 3.3 (even if you driver supports more)!\n");
fprintf(stderr, "OpenGL information. GPU: %s. Vendor: %s\n", glGetString(GL_RENDERER), vendor);
// Name change but driver is still bad!
if (strstr(vendor, "ATI") || strstr(vendor, "Advanced Micro Devices"))
@ -401,10 +400,14 @@ namespace GLLoader {
nvidia_buggy_driver = true;
if (strstr(vendor, "Intel"))
intel_buggy_driver = true;
if (strstr(vendor, "X.Org") || strstr(vendor, "nouveau")) // Note: it might actually catch nouveau too, but bug are likely to be the same anyway
if (strstr(vendor, "X.Org") || strstr(vendor, "nouveau")) // Note: it might actually catch nouveau too, but bugs are likely to be the same anyway
mesa_amd_buggy_driver = true;
if (strstr(vendor, "VMware")) // Assume worst case because I don't know the real status
mesa_amd_buggy_driver = intel_buggy_driver = true;
#ifdef _WINDOWS
if (intel_buggy_driver)
return false; // too much buggy no need to check anything.
#endif
GLuint dot = 0;
while (s[dot] != '\0' && s[dot] != '.') dot++;
@ -422,7 +425,7 @@ namespace GLLoader {
fprintf(stderr, "Overriding geometry shaders detection\n");
}
if ( (major_gl < major) || ( major_gl == major && minor_gl < minor ) ) {
fprintf(stderr, "OpenGL %d.%d is not supported\n", major, minor);
fprintf(stderr, "OpenGL %d.%d is not supported. Only OpenGL %d.%d\n was found", major, minor, major_gl, minor_gl);
return false;
}
@ -517,10 +520,10 @@ namespace GLLoader {
}
if (!found_GL_ARB_texture_barrier) {
if (theApp.GetConfig("accurate_blend", 1)) {
fprintf(stderr, "Error GL_ARB_texture_barrier is not supported by your driver so you can't enable accurate_blend! Sorry.\n");
theApp.SetConfig("accurate_blend", 0);
}
fprintf(stderr, "Error GL_ARB_texture_barrier is not supported by your driver. Accurate options will be disabled! Sorry!\n");
theApp.SetConfig("accurate_blend", 0);
theApp.SetConfig("accurate_colclip", 0);
theApp.SetConfig("accurate_fbmask", 0);
}
fprintf(stderr, "\n");

View File

@ -86,8 +86,8 @@ EXPORT_C_(const char*) PS2EgetLibName()
EXPORT_C_(uint32) PS2EgetLibVersion2(uint32 type)
{
const uint32 revision = 0;
const uint32 build = 1;
const uint32 revision = 1;
const uint32 build = 0;
return (build << 0) | (revision << 8) | (PS2E_GS_VERSION << 16) | (PLUGIN_VERSION << 24);
}
@ -194,13 +194,6 @@ EXPORT_C GSclose()
static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
{
// I really don't know the impact on windows! It could work
#ifdef __linux__
if (theApp.GetConfig("enable_nvidia_multi_thread", 1)) {
setenv("__GL_THREADED_OPTIMIZATIONS", "1", 0);
}
#endif
GSDevice* dev = NULL;
if(renderer == -1)

View File

@ -21,7 +21,7 @@
#pragma once
#define PLUGIN_VERSION 16
#define PLUGIN_VERSION 0
#define VM_SIZE 4194304
#define PAGE_SIZE 8192
@ -523,6 +523,7 @@ REG_END2
// opaque => output will be Cs/As
__forceinline bool IsOpaque() const {return ((A == B || (C == 2 && FIX == 0)) && D == 0) || (A == 0 && B == D && C == 2 && FIX == 0x80);}
__forceinline bool IsOpaque(int amin, int amax) const {return ((A == B || amax == 0) && D == 0) || (A == 0 && B == D && amin == 0x80 && amax == 0x80);}
__forceinline bool IsCd() { return (A == B) && (D == 1);}
REG_END2
REG64_(GIFReg, BITBLTBUF)

View File

@ -173,9 +173,6 @@ CRC::Game CRC::m_games[] =
{0xA32F7CD0, AceCombat4, US, 0},
{0x5ED8FB53, AceCombat4, JP, 0},
{0x1B9B7563, AceCombat4, EU, 0},
{0xEC432B24, Drakengard2, EU, 0},
{0x1648E3C9, Drakengard2, US, 0},
{0xB7ADB13A, Drakengard2, CH, 0},
{0xFC46EA61, Tekken5, JP, 0},
{0x1F88EE37, Tekken5, EU, 0},
{0x1F88BECD, Tekken5, EU, 0}, //language selector...
@ -296,9 +293,6 @@ CRC::Game CRC::m_games[] =
{0x90F0D852, BigMuthaTruckers, US, 0},
{0x5CC9BF81, TimeSplitters2, EU, 0},
{0x12532F1C, TimeSplitters2, US, 0},
{0xA33748AA, ReZ, US, 0},
{0xAE1152EB, ReZ, EU, 0},
{0xD2EA890A, ReZ, JP, 0},
{0xC818BEC2, LordOfTheRingsTwoTowers, US, 0},
{0xDC43F2B8, LordOfTheRingsTwoTowers, EU, 0},
{0x9ABF90FB, LordOfTheRingsTwoTowers, ES, 0},
@ -354,9 +348,6 @@ CRC::Game CRC::m_games[] =
// DMC(1)? {0x79B8A95F, DevilMayCry3, US, 0},
{0x7F3D692D, DevilMayCry3, CH, 0},
// {0x1A85E924, DevilMayCry3, CH, 0}, // same CRC as {GodOfWar, NoRegion}
{0x0a8ef911, ArctheLad, US, 0}, // cutie comment
{0x2C5E7DEA, ArctheLad, CH, 0},
{0xE69E7F58, ArctheLad, US, 0}, // cutie comment
{0xB1995E29, ShadowofRome, EU, 0}, // cutie comment
{0x958DCA28, ShadowofRome, EU, 0},
{0x57818AF6, ShadowofRome, US, 0},

View File

@ -61,7 +61,6 @@ public:
ResidentEvil4,
Spartan,
AceCombat4,
Drakengard2,
Tekken5,
IkkiTousen,
GodOfWar,
@ -97,11 +96,9 @@ public:
Sly3,
Sly2,
ShadowofRome,
ArctheLad,
DemonStone,
BigMuthaTruckers,
TimeSplitters2,
ReZ,
LordOfTheRingsTwoTowers,
LordOfTheRingsThirdAge,
RedDeadRevolver,

View File

@ -158,6 +158,15 @@ void GSDevice::Recycle(GSTexture* t)
{
if(t)
{
// FIXME: WARNING: Broken Texture Cache reuse render target without any
// cleaning (or uploading of correct gs mem data) Ofc it is wrong. If
// blending is enabled, rendering would be completely broken. However
// du to wrong invalidation of the TC it is sometimes better to reuse
// (partially) wrong data...
//
// Invalidating the data might be even worse. I'm not sure invalidating data really
// help on the perf. But people reports better perf on BDG2 (memory intensive) on OpenGL.
// It could be the reason.
t->Invalidate();
t->last_frame_used = m_frame;

View File

@ -28,6 +28,14 @@
#pragma pack(push, 1)
class ConvertConstantBuffer
{
public:
GSVector4i ScalingFactor;
ConvertConstantBuffer() {memset(this, 0, sizeof(*this));}
};
class MergeConstantBuffer
{
public:

View File

@ -179,6 +179,8 @@ public:
uint32 spritehack:1;
uint32 tcoffsethack:1;
uint32 point_sampler:1;
uint32 shuffle:1;
uint32 read_ba:1;
};
uint32 key;

View File

@ -39,6 +39,7 @@ static const uint32 g_merge_cb_index = 10;
static const uint32 g_interlace_cb_index = 11;
static const uint32 g_shadeboost_cb_index = 12;
static const uint32 g_fx_cb_index = 14;
static const uint32 g_convert_index = 15;
bool GSDeviceOGL::m_debug_gl_call = false;
int GSDeviceOGL::s_n = 0;
@ -103,6 +104,7 @@ GSDeviceOGL::~GSDeviceOGL()
delete m_convert.dss;
delete m_convert.dss_write;
delete m_convert.bs;
delete m_convert.cb;
// Clean m_fxaa
delete m_fxaa.cb;
@ -242,6 +244,12 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
// ****************************************************************
// convert
// ****************************************************************
m_convert.cb = new GSUniformBufferOGL(g_convert_index, sizeof(ConvertConstantBuffer));
// Upload once and forget about it
ConvertConstantBuffer cb;
cb.ScalingFactor = GSVector4i(theApp.GetConfig("nativeres", 0) ? 1 : theApp.GetConfig("upscale_multiplier", 2));
m_convert.cb->upload(&cb);
m_convert.vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, convert_glsl);
for(size_t i = 0; i < countof(m_convert.ps); i++)
m_convert.ps[i] = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, convert_glsl);
@ -647,6 +655,9 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
//+ format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler)
+ format("#define PS_BLEND %d\n", sel.blend)
+ format("#define PS_IIP %d\n", sel.iip)
+ format("#define PS_SHUFFLE %d\n", sel.shuffle)
+ format("#define PS_READ_BA %d\n", sel.read_ba)
+ format("#define PS_FBMASK %d\n", sel.fbmask)
;
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
@ -738,7 +749,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
return;
}
bool draw_in_depth = (ps == m_convert.ps[12]);
bool draw_in_depth = (ps == m_convert.ps[12] || ps == m_convert.ps[13]);
// Performance optimization. It might be faster to use a framebuffer blit for standard case
// instead to emulate it with shader
@ -996,7 +1007,6 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
{
GL_PUSH("DATE First Pass");
GSTexture* t = NULL;
// sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows
BeginScene();
@ -1016,7 +1026,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
OMSetDepthStencilState(m_date.dss, 1);
OMSetBlendState(m_date.bs, 0);
// normally ok without any RT if GL_ARB_framebuffer_no_attachments is supported (minus driver bug)
OMSetRenderTargets(t, ds, &GLState::scissor);
OMSetRenderTargets(NULL, ds, &GLState::scissor);
OMSetColorMaskState(); // TODO: likely useless
// ia
@ -1035,9 +1045,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
PSSetSamplerState(m_convert.pt);
}
OMSetWriteBuffer(GL_NONE);
DrawPrimitive();
OMSetWriteBuffer();
EndScene();
@ -1179,9 +1187,10 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto
if (rt == NULL || !RT->IsBackbuffer()) {
OMSetFBO(m_fbo);
if (rt) {
OMSetWriteBuffer();
OMAttachRt(RT);
} else {
// Note: NULL rt is only used in DATE so far.
OMSetWriteBuffer(GL_NONE);
OMAttachRt();
}

View File

@ -254,6 +254,7 @@ class GSDeviceOGL : public GSDevice
GSVector4 WH;
GSVector4 MinF_TA;
GSVector4i MskFix;
GSVector4i FbMask;
GSVector4 AlphaCoeff;
GSVector4 HalfTexel;
@ -263,13 +264,14 @@ class GSDeviceOGL : public GSDevice
PSConstantBuffer()
{
FogColor_AREF = GSVector4::zero();
HalfTexel = GSVector4::zero();
WH = GSVector4::zero();
MinMax = GSVector4::zero();
MinF_TA = GSVector4::zero();
MskFix = GSVector4i::zero();
AlphaCoeff = GSVector4::zero();
HalfTexel = GSVector4::zero();
WH = GSVector4::zero();
MinMax = GSVector4::zero();
MinF_TA = GSVector4::zero();
MskFix = GSVector4i::zero();
AlphaCoeff = GSVector4::zero();
TC_OffsetHack = GSVector4::zero();
FbMask = GSVector4i::zero();
}
__forceinline bool Update(const PSConstantBuffer* cb)
@ -279,7 +281,7 @@ class GSDeviceOGL : public GSDevice
// if WH matches both HalfTexel and TC_OffsetHack do too
// MinMax depends on WH and MskFix so no need to check it too
if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4])).alltrue())
if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5])).alltrue())
{
// Note previous check uses SSE already, a plain copy will be faster than any memcpy
a[0] = b[0];
@ -287,6 +289,7 @@ class GSDeviceOGL : public GSDevice
a[2] = b[2];
a[3] = b[3];
a[4] = b[4];
a[5] = b[5];
return true;
}
@ -322,14 +325,17 @@ class GSDeviceOGL : public GSDevice
uint32 wmt:2;
uint32 ltf:1;
uint32 ifmt:2;
uint32 shuffle:1;
uint32 read_ba:1;
uint32 _free1:2;
//uint32 _free1:0;
// Word 2
uint32 blend:8;
uint32 dfmt:2;
uint32 fbmask:1;
uint32 _free2:22;
uint32 _free2:21;
};
uint64 key;
@ -492,12 +498,13 @@ class GSDeviceOGL : public GSDevice
struct {
GLuint vs; // program object
GLuint ps[13]; // program object
GLuint ps[15]; // program object
GLuint ln; // sampler object
GLuint pt; // sampler object
GSDepthStencilOGL* dss;
GSDepthStencilOGL* dss_write;
GSBlendStateOGL* bs;
GSUniformBufferOGL* cb;
} m_convert;
struct {

View File

@ -295,22 +295,23 @@ void populate_hw_table(GtkWidget* hw_table)
GtkWidget* acc_blend_check = CreateCheckBox("Accurate Blend", "accurate_blend", true);
GtkWidget* acc_date_check = CreateCheckBox("Accurate Date", "accurate_date", false);
GtkWidget* acc_cclip_check = CreateCheckBox("Accurate Color Clipping", "accurate_colclip", false);
GtkWidget* MT_nvidia_check = CreateCheckBox("Nvidia Multi-Thread support", "enable_nvidia_multi_thread", true);
GtkWidget* acc_fbmsk_check = CreateCheckBox("Accurate FrameBuffer Mask", "accurate_fbmask", false);
GtkWidget* tc_depth_check = CreateCheckBox("Full Depth Emulation", "texture_cache_depth", true);
// Some helper string
gtk_widget_set_tooltip_text(paltex_check, dialog_message(IDC_PALTEX));
gtk_widget_set_tooltip_text(acc_blend_check, dialog_message(IDC_ACCURATE_BLEND));
gtk_widget_set_tooltip_text(acc_date_check, dialog_message(IDC_ACCURATE_DATE));
gtk_widget_set_tooltip_text(acc_cclip_check, dialog_message(IDC_ACCURATE_COLCLIP));
gtk_widget_set_tooltip_text(MT_nvidia_check, "Huge speedup on Nvidia binary driver! No effect otherwise.");
gtk_widget_set_tooltip_text(acc_fbmsk_check, dialog_message(IDC_ACCURATE_FBMASK));
gtk_widget_set_tooltip_text(crc_label, dialog_message(IDC_CRC_LEVEL));
gtk_widget_set_tooltip_text(crc_combo_box, dialog_message(IDC_CRC_LEVEL));
gtk_widget_set_tooltip_text(tc_depth_check, dialog_message(IDC_TC_DEPTH));
s_table_line = 0;
InsertWidgetInTable(hw_table, paltex_check, MT_nvidia_check);
InsertWidgetInTable(hw_table, paltex_check, tc_depth_check);
InsertWidgetInTable(hw_table, acc_blend_check, acc_date_check);
InsertWidgetInTable(hw_table, acc_cclip_check);
InsertWidgetInTable(hw_table, acc_cclip_check, acc_fbmsk_check);
InsertWidgetInTable(hw_table, filter_label, filter_combo_box);
InsertWidgetInTable(hw_table, af_label, af_combo_box);
InsertWidgetInTable(hw_table, crc_label, crc_combo_box);
@ -352,11 +353,12 @@ void populate_sw_table(GtkWidget* sw_table)
GtkWidget* threads_spin = CreateSpinButton(0, 32, "extrathreads", 0);
GtkWidget* aa_check = CreateCheckBox("Edge anti-aliasing (AA1)", "aa1");
GtkWidget* mipmap_check = CreateCheckBox("Mipmap", "mipmap", true);
GtkWidget* spin_thread_check= CreateCheckBox("Disable thread sleeping (6+ cores CPU)", "spin_thread");
s_table_line = 0;
InsertWidgetInTable(sw_table , threads_label , threads_spin);
InsertWidgetInTable(sw_table , aa_check);
InsertWidgetInTable(sw_table , aa_check, mipmap_check);
InsertWidgetInTable(sw_table , spin_thread_check , spin_thread_check);
}

View File

@ -50,6 +50,7 @@ protected:
bool m_shaderfx;
bool m_fxaa;
bool m_shadeboost;
bool m_texture_shuffle;
virtual GSTexture* GetOutput(int i) = 0;

View File

@ -47,8 +47,8 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSVector2i& rtsize = rt->GetSize();
const GSVector2& rtscale = rt->GetScale();
const GSVector2i& rtsize = ds->GetSize();
const GSVector2& rtscale = ds->GetScale();
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
@ -199,7 +199,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if(rt->LikelyOffset)
if(rt && rt->LikelyOffset)
{
// DX9 has pixelcenter set to 0.0, so give it some value here
@ -225,6 +225,87 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
GSDeviceDX::PSSamplerSelector ps_ssel;
GSDeviceDX::PSConstantBuffer ps_cb;
// Gregory: code is not yet ready so let's only enable it when
// CRC is below the FULL level
if (m_texture_shuffle && (m_crc_hack_level < 3)) {
ps_sel.shuffle = 1;
ps_sel.fmt = 0;
const GIFRegXYOFFSET& o = m_context->XYOFFSET;
GSVertex* v = &m_vertex.buff[0];
size_t count = m_vertex.next;
// vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors
int pos = (v[0].XYZ.X - o.OFX) & 0xFF;
bool write_ba = (pos > 112 && pos < 136);
// Read texture is 8 to 16 pixels (same as above)
int tex_pos = v[0].U & 0xFF;
ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144);
GL_INS("Color shuffle %s => %s", ps_sel.read_ba ? "BA" : "RG", write_ba ? "BA" : "RG");
// Convert the vertex info to a 32 bits color format equivalent
for (size_t i = 0; i < count; i += 2) {
if (write_ba)
v[i].XYZ.X -= 128u;
else
v[i + 1].XYZ.X += 128u;
if (ps_sel.read_ba)
v[i].U -= 128u;
else
v[i + 1].U += 128u;
// Height is too big (2x).
int tex_offset = v[i].V & 0xF;
GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset);
GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V);
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
v[i].XYZ.Y = tmp.x;
v[i].V = tmp.y;
v[i + 1].XYZ.Y = tmp.z;
v[i + 1].V = tmp.w;
}
// Please bang my head against the wall!
// 1/ Reduce the frame mask to a 16 bit format
const uint32& m = context->FRAME.FBMSK;
uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 31) & 0x8000);
om_bsel.wrgba = 0;
// 2 Select the new mask (Please someone put SSE here)
if ((fbmask & 0xFF) == 0) {
if (write_ba)
om_bsel.wb = 1;
else
om_bsel.wr = 1;
}
else if ((fbmask & 0xFF) != 0xFF) {
fprintf(stderr, "Please fix me! wb %d wr %d\n", om_bsel.wb, om_bsel.wr);
//ASSERT(0);
}
fbmask >>= 8;
if ((fbmask & 0xFF) == 0) {
if (write_ba)
om_bsel.wa = 1;
else
om_bsel.wg = 1;
}
else if ((fbmask & 0xFF) != 0xFF) {
fprintf(stderr, "Please fix me! wa %d wg %d\n", om_bsel.wa, om_bsel.wg);
//ASSERT(0);
}
}
else {
//ps_sel.fmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
}
if(DATE)
{
if(dev->HasStencil())
@ -245,7 +326,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = context->FBA.FBA;
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
ps_sel.aout &= !ps_sel.shuffle;
if(UserHacks_AlphaHack) ps_sel.aout = 1;
if(PRIM->FGE)
@ -292,7 +373,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
ps_sel.fmt = tex->m_palette? cpsm.fmt | 4 : cpsm.fmt;
if (ps_sel.shuffle) {
ps_sel.fmt = 0;
} else {
ps_sel.fmt = tex->m_palette ? cpsm.fmt | 4 : cpsm.fmt;
}
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;

View File

@ -231,6 +231,9 @@ void GSRendererDX9::SetupIA()
void GSRendererDX9::UpdateFBA(GSTexture* rt)
{
if (!rt)
return;
GSDevice9* dev = (GSDevice9*)m_dev;
dev->BeginScene();

View File

@ -161,7 +161,7 @@ GSTexture* GSRendererHW::GetOutput(int i)
GSTexture* t = NULL;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height))
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GetFrameRect(i).bottom))
{
t = rt->m_texture;
@ -334,12 +334,18 @@ void GSRendererHW::Draw()
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
// It is allowed to use the depth and rt at the same location. However at least 1 must
// be disabled. GoW uses a Cd blending on a 24 bits buffer (no alpha)
const bool no_rt = context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1);
GIFRegTEX0 TEX0;
TEX0.TBP0 = context->FRAME.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->FRAME.PSM;
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
GSTextureCache::Target* rt = no_rt ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
GSTexture* rt_tex = rt ? rt->m_texture : NULL;
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
@ -347,7 +353,7 @@ void GSRendererHW::Draw()
GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
if(!rt || !ds)
if((!rt && !no_rt) || !ds)
{
GL_POP();
ASSERT(0);
@ -355,6 +361,7 @@ void GSRendererHW::Draw()
}
GSTextureCache::Source* tex = NULL;
m_texture_shuffle = false;
if(PRIM->TME)
{
@ -389,6 +396,13 @@ void GSRendererHW::Draw()
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
}
if (rt) {
rt->m_32_bits_fmt |= tex->m_32_bits_fmt;
}
// Both input and output are 16 bits but texture was initially 32 bits!
m_texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS) && tex->m_32_bits_fmt);
ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3));
}
if(s_dump)
@ -429,7 +443,8 @@ void GSRendererHW::Draw()
{
s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(root_hw+s);
if (rt)
rt->m_texture->Save(root_hw+s);
}
if(s_savez && s_n >= s_saven)
@ -447,7 +462,7 @@ void GSRendererHW::Draw()
#endif
}
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds->m_texture, tex))
{
s_n += 1; // keep counter sync
GL_POP();
@ -514,7 +529,7 @@ void GSRendererHW::Draw()
//
DrawPrims(rt->m_texture, ds->m_texture, tex);
DrawPrims(rt_tex, ds->m_texture, tex);
//
@ -526,11 +541,13 @@ void GSRendererHW::Draw()
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));
if(fm != 0xffffffff)
if(fm != 0xffffffff && rt)
{
rt->m_valid = rt->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.fb, r, false);
m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block());
}
if(zm != 0xffffffff)
@ -538,6 +555,8 @@ void GSRendererHW::Draw()
ds->m_valid = ds->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.zb, r, false);
m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block());
}
//
@ -557,7 +576,8 @@ void GSRendererHW::Draw()
{
s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(root_hw+s);
if (rt)
rt->m_texture->Save(root_hw+s);
}
if(s_savez && s_n >= s_saven)
@ -580,7 +600,8 @@ void GSRendererHW::Draw()
#ifdef DISABLE_HW_TEXTURE_CACHE
m_tc->Read(rt, r);
if (rt)
m_tc->Read(rt, r);
#endif
@ -632,12 +653,67 @@ void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game)
m_oo = m_oo_map[hash];
m_cu = m_cu_map[hash];
if(game.flags & CRC::PointListPalette)
{
if (game.flags & CRC::PointListPalette) {
ASSERT(m_oi == NULL);
m_oi = &GSRendererHW::OI_PointListPalette;
}
#if 0
// FIXME: Enable this code in the future. I think it could replace
// most of the "old" OI hack. So far code was tested on GoW2 & SimpsonsGame with
// success
if (m_oi == NULL) {
m_oi = &GSRendererHW::OI_DoubleHalfClear;
}
#endif
}
bool GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if (m_vt.m_primclass == GS_SPRITE_CLASS && !PRIM->TME && !m_context->ZBUF.ZMSK && (m_context->FRAME.FBW >= 7)) {
GSVertex* v = &m_vertex.buff[0];
//GL_INS("OI_DoubleHalfClear: psm:%x. Z:%d R:%d G:%d B:%d A:%d", m_context->FRAME.PSM,
// v[1].XYZ.Z, v[1].RGBAQ.R, v[1].RGBAQ.G, v[1].RGBAQ.B, v[1].RGBAQ.A);
// Check it is a clear on the first primitive only
if (v[1].XYZ.Z || v[1].RGBAQ.R || v[1].RGBAQ.G || v[1].RGBAQ.B || v[1].RGBAQ.A) {
return true;
}
// Only 32 bits format is supported otherwise it is complicated
if (m_context->FRAME.PSM & 2)
return true;
// FIXME might need some rounding
// In 32 bits pages are 64x32 pixels. In theory, it must be somethings
// like FBW * 64 pixels * ratio / 32 pixels / 2 = FBW * ratio
// It is hard to predict the ratio, so I round it to 1. And I use
// <= comparison below.
uint32 h_pages = m_context->FRAME.FBW;
uint32 base;
uint32 half;
if (m_context->FRAME.FBP > m_context->ZBUF.ZBP) {
base = m_context->ZBUF.ZBP;
half = m_context->FRAME.FBP;
} else {
base = m_context->FRAME.FBP;
half = m_context->ZBUF.ZBP;
}
if (half <= (base + h_pages * m_context->FRAME.FBW)) {
//GL_INS("OI_DoubleHalfClear: base %x half %x. h_pages %d fbw %d", base, half, h_pages, m_context->FRAME.FBW);
if (m_context->FRAME.FBP > m_context->ZBUF.ZBP) {
m_dev->ClearDepth(ds, 0);
} else {
m_dev->ClearRenderTarget(rt, 0);
}
// Don't return false, it will break the rendering. I guess that it misses texture
// invalidation
//return false;
}
}
return true;
}
// OI (others input?/implementation?) hacks replace current draw call

View File

@ -46,6 +46,7 @@ private:
typedef void (GSRendererHW::*OO_Ptr)();
typedef bool (GSRendererHW::*CU_Ptr)();
bool OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);

View File

@ -32,6 +32,7 @@ GSRendererOGL::GSRendererOGL()
m_accurate_blend = theApp.GetConfig("accurate_blend", 1);
m_accurate_date = theApp.GetConfig("accurate_date", 0);
m_accurate_colclip = theApp.GetConfig("accurate_colclip", 0);
m_accurate_fbmask = theApp.GetConfig("accurate_fbmask", 0);
UserHacks_AlphaHack = theApp.GetConfig("UserHacks_AlphaHack", 0);
UserHacks_AlphaStencil = theApp.GetConfig("UserHacks_AlphaStencil", 0);
@ -217,13 +218,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
{
GL_PUSH("GL Draw from %d in %d (Depth %d)",
tex && tex->m_texture ? tex->m_texture->GetID() : 0,
rt->GetID(), ds->GetID());
rt ? rt->GetID() : -1, ds->GetID());
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSVector2i& rtsize = rt->GetSize();
const GSVector2& rtscale = rt->GetScale();
const GSVector2i& rtsize = ds->GetSize();
const GSVector2& rtscale = ds->GetScale();
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
bool DATE_GL42 = false;
@ -247,8 +248,163 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
GSDeviceOGL::OMColorMaskSelector om_csel;
GSDeviceOGL::OMDepthStencilSelector om_dssel;
if (m_texture_shuffle) {
ps_sel.shuffle = 1;
ps_sel.dfmt = 0;
const GIFRegXYOFFSET& o = m_context->XYOFFSET;
GSVertex* v = &m_vertex.buff[0];
size_t count = m_vertex.next;
// vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors
int pos = (v[0].XYZ.X - o.OFX) & 0xFF;
bool write_ba = (pos > 112 && pos < 136);
// Read texture is 8 to 16 pixels (same as above)
float tw = (float)(1u << context->TEX0.TW);
int tex_pos = (PRIM->FST) ? v[0].U : tw * v[0].ST.S;
tex_pos &= 0xFF;
ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144);
// Convert the vertex info to a 32 bits color format equivalent
if (PRIM->FST) {
GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U);
for(size_t i = 0; i < count; i += 2) {
if (write_ba)
v[i].XYZ.X -= 128u;
else
v[i+1].XYZ.X += 128u;
if (ps_sel.read_ba)
v[i].U -= 128u;
else
v[i+1].U += 128u;
// Height is too big (2x).
int tex_offset = v[i].V & 0xF;
GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset);
GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i+1].XYZ.Y, v[i+1].V);
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
v[i].XYZ.Y = tmp.x;
v[i].V = tmp.y;
v[i+1].XYZ.Y = tmp.z;
v[i+1].V = tmp.w;
}
} else {
const float offset_8pix = 8.0f / tw;
GL_INS("First vertex is P: %d => %d T: %f => %f (offset %f)", v[0].XYZ.X, v[1].XYZ.X, v[0].ST.S, v[1].ST.S, offset_8pix);
for(size_t i = 0; i < count; i += 2) {
if (write_ba)
v[i].XYZ.X -= 128u;
else
v[i+1].XYZ.X += 128u;
if (ps_sel.read_ba)
v[i].ST.S -= offset_8pix;
else
v[i+1].ST.S += offset_8pix;
// Height is too big (2x).
GSVector4i offset(o.OFY, o.OFY);
GSVector4i tmp(v[i].XYZ.Y, v[i+1].XYZ.Y);
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
//fprintf(stderr, "Before %d, After %d\n", v[i+1].XYZ.Y, tmp.y);
v[i].XYZ.Y = tmp.x;
v[i].ST.T /= 2.0f;
v[i+1].XYZ.Y = tmp.y;
v[i+1].ST.T /= 2.0f;
}
}
// Please bang my head against the wall!
// 1/ Reduce the frame mask to a 16 bit format
const uint32& m = context->FRAME.FBMSK;
uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 31) & 0x8000);
// FIXME GSVector will be nice here
uint8 rg_mask = fbmask & 0xFF;
uint8 ba_mask = (fbmask >> 8) & 0xFF;
om_csel.wrgba = 0;
// 2 Select the new mask (Please someone put SSE here)
if (rg_mask != 0xFF) {
if (write_ba) {
GL_INS("Color shuffle %s => B", ps_sel.read_ba ? "B" : "R");
om_csel.wb = 1;
} else {
GL_INS("Color shuffle %s => R", ps_sel.read_ba ? "B" : "R");
om_csel.wr = 1;
}
if (rg_mask)
ps_sel.fbmask = 1;
}
if (ba_mask != 0xFF) {
if (write_ba) {
GL_INS("Color shuffle %s => A", ps_sel.read_ba ? "A" : "G");
om_csel.wa = 1;
} else {
GL_INS("Color shuffle %s => G", ps_sel.read_ba ? "A" : "G");
om_csel.wg = 1;
}
if (ba_mask)
ps_sel.fbmask = 1;
}
ps_sel.fbmask &= m_accurate_fbmask;
if (ps_sel.fbmask) {
GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask);
ps_cb.FbMask.r = rg_mask;
ps_cb.FbMask.g = rg_mask;
ps_cb.FbMask.b = ba_mask;
ps_cb.FbMask.a = ba_mask;
require_barrier = true;
dev->PSSetShaderResource(3, rt);
}
} else {
ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
{
// FIXME GSVector will be nice here
uint8 r_mask = (context->FRAME.FBMSK >> 0) & 0xFF;
uint8 g_mask = (context->FRAME.FBMSK >> 8) & 0xFF;
uint8 b_mask = (context->FRAME.FBMSK >> 16) & 0xFF;
uint8 a_mask = (context->FRAME.FBMSK >> 24) & 0xFF;
if (r_mask != 0 && r_mask != 0xFF) {
ps_sel.fbmask = 1;
}
if (g_mask != 0 && g_mask != 0xFF) {
ps_sel.fbmask = 1;
}
if (b_mask != 0 && b_mask != 0xFF) {
ps_sel.fbmask = 1;
}
if (a_mask != 0 && a_mask != 0xFF) {
ps_sel.fbmask = 1;
}
ps_sel.fbmask &= m_accurate_fbmask;
if (ps_sel.fbmask) {
GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", context->FRAME.FBMSK,
(GSLocalMemory::m_psm[context->FRAME.PSM].fmt == 2) ? 16 : 32);
ps_cb.FbMask.r = r_mask;
ps_cb.FbMask.g = g_mask;
ps_cb.FbMask.b = b_mask;
ps_cb.FbMask.a = a_mask;
require_barrier = true;
dev->PSSetShaderResource(3, rt);
}
}
}
// Format of the output
ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
GIFRegALPHA ALPHA = context->ALPHA;
float afix = (float)context->ALPHA.FIX / 0x80;
@ -285,7 +441,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
}
}
om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
if (ps_sel.dfmt == 1) {
if (ALPHA.C == 1) {
// 24 bits no alpha channel so use 1.0f fix factor as equivalent
@ -425,7 +580,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if (rt->LikelyOffset)
if (rt && rt->LikelyOffset)
{
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
@ -471,7 +626,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
}
ps_sel.fba = context->FBA.FBA;
// TODO deprecat this stuff
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
ps_sel.aout &= !ps_sel.shuffle;
if (UserHacks_AlphaHack) ps_sel.aout = 1;
@ -524,7 +681,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
if (tex->m_palette) {
if (ps_sel.shuffle) {
ps_sel.fmt = 0;
} else if (tex->m_palette) {
ps_sel.fmt = cpsm.fmt | 4;
ps_sel.ifmt = !tex->m_target ? 0
: (context->TEX0.PSM == PSM_PSMT4HL) ? 2
@ -619,9 +779,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d;
int bogus_blend = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus;
bool all_sw = !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ) && (m_accurate_blend > 1);
bool sw_blending = (m_accurate_blend && (bogus_blend & A_MAX)) || acc_colclip_wrap || all_sw;
bool sw_blending = (m_accurate_blend && (bogus_blend & A_MAX)) || acc_colclip_wrap || all_sw || ps_sel.fbmask;
if (sw_blending && om_bsel.abe) {
if (sw_blending && om_bsel.abe && rt) {
GL_INS("!!! SW blending effect used (0x%x from sel %d) !!!", bogus_blend, blend_sel);
// select a shader that support blending
@ -631,7 +791,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Require the fix alpha vlaue
if (ALPHA.C == 2) {
ps_cb.AlphaCoeff = GSVector4(afix);
ps_cb.AlphaCoeff.a = afix;
}
// No need to flush for every primitive

View File

@ -34,6 +34,7 @@ class GSRendererOGL : public GSRendererHW
int m_accurate_blend;
bool m_accurate_date;
bool m_accurate_colclip;
bool m_accurate_fbmask;
bool UserHacks_AlphaHack;
bool UserHacks_AlphaStencil;

View File

@ -516,6 +516,9 @@ void GSRendererSW::Draw()
Sync(2);
uint64 frame = m_perfmon.GetFrame();
// Dump the texture in 32 bits format. It helps to debug texture shuffle effect
// It will breaks the few games that really uses 16 bits RT
bool texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS));
string s;
@ -529,18 +532,34 @@ void GSRendererSW::Draw()
if(s_savet && s_n >= s_saven && PRIM->TME)
{
s = format("%05d_f%lld_tex_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
if (texture_shuffle) {
// Dump the RT in 32 bits format. It helps to debug texture shuffle effect
s = format("%05d_f%lld_tex_%05x_32bits.bmp", s_n, frame, (int)m_context->TEX0.TBP0);
m_mem.SaveBMP(root_sw+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, 0, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);
} else {
s = format("%05d_f%lld_tex_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
m_mem.SaveBMP(root_sw+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);
}
m_mem.SaveBMP(root_sw+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);
}
s_n++;
if(s_save && s_n >= s_saven)
{
s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM);
m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
if (texture_shuffle) {
// Dump the RT in 32 bits format. It helps to debug texture shuffle effect
s = format("%05d_f%lld_rt0_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block());
m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512);
} else {
s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM);
m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
}
}
if(s_savez && s_n >= s_saven)
@ -558,9 +577,16 @@ void GSRendererSW::Draw()
if(s_save && s_n >= s_saven)
{
s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM);
if (texture_shuffle) {
// Dump the RT in 32 bits format. It helps to debug texture shuffle effect
s = format("%05d_f%lld_rt1_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block());
m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512);
} else {
s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM);
m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
}
}
if(s_savez && s_n >= s_saven)

View File

@ -69,7 +69,8 @@ const char* dialog_message(int ID, bool* updateText) {
case IDC_MSAACB:
case IDC_STATIC_MSAA:
return "Multisample Anti-Aliasing\n\nEnables hardware Anti-Aliasing. Needs lots of memory."
" The Z-24 modes might need to have LogarithmicZ to compensate for the bits lost (only in DX9 mode).";
" The Z-24 modes might need to have LogarithmicZ to compensate for the bits lost (only in DX9 mode).\n\n"
" MSAA is not implemented on the OpenGL renderer";
case IDC_AGGRESSIVECRC:
return "Use more aggressive CRC hacks on some games\n\n"
"Only affects few games, removing some effects which might make the image sharper/clearer.\n"
@ -107,18 +108,23 @@ const char* dialog_message(int ID, bool* updateText) {
#ifdef __linux__
case IDC_PALTEX:
return "When checked 4/8 bits texture will be send to the GPU with a palette. GPU will be in charge of the conversion. "
"(Note it was never tested on openGL)\n\n"
"(Note it was never tested on OpenGL)\n\n"
"When uncheked the CPU will convert directly the texture to 32 bits\n\n"
"It is a basically a trade-off between GPU/CPU";
case IDC_ACCURATE_DATE:
return "Implement a more accurate algorithm to compute GS destination alpha testing.\n\n"
"It could be slower when the effects are used.\n\nNote: it requires the 4.2 openGL extension GL_ARB_shader_image_load_store";
"It could be slower when the effects are used.\n\nNote: it requires the 4.2 OpenGL extension GL_ARB_shader_image_load_store";
case IDC_ACCURATE_BLEND:
return "Allow to solve the impossible blending error message.\n\n"
"It could be slower when the effect are used.\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier";
"It could be slower when the effect are used.\n\nNote: it requires the 4.5 OpenGL extension GL_ARB_texture_barrier";
case IDC_ACCURATE_COLCLIP:
return "Debug option to implement the wrapping of color after an overflow\n\n"
"It will be slow when the effect are used!\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier";
return "Implement the wrapping of color after an overflow\n\n"
"It will be slow (half speed) when the effect are used!\n\nNote: it requires the 4.5 OpenGL extension GL_ARB_texture_barrier";
case IDC_ACCURATE_FBMASK:
return "Implement partial color masking\n\n"
"No status yet on the speed impact\n\nNote: it requires the 4.5 OpenGL extension GL_ARB_texture_barrier";
case IDC_TC_DEPTH:
return "Allow to convert Depth buffer from/to Color buffer. It is used for blur & depth of field effects";
#endif
default:
if (updateText)

View File

@ -66,6 +66,8 @@ enum {
IDC_ACCURATE_BLEND,
IDC_ACCURATE_DATE,
IDC_ACCURATE_COLCLIP,
IDC_ACCURATE_FBMASK,
IDC_TC_DEPTH,
IDC_CRC_LEVEL
};
#endif

View File

@ -197,6 +197,7 @@ void GSSettingsDlg::OnInit()
CheckDlgButton(m_hWnd, IDC_ACCURATE_BLEND, theApp.GetConfig("accurate_blend", 1));
CheckDlgButton(m_hWnd, IDC_ACCURATE_DATE, theApp.GetConfig("accurate_date", 0));
CheckDlgButton(m_hWnd, IDC_ACCURATE_COLCLIP, theApp.GetConfig("accurate_colclip", 0));
CheckDlgButton(m_hWnd, IDC_TC_DEPTH, theApp.GetConfig("texture_cache_depth", 0));
// Shade Boost
CheckDlgButton(m_hWnd, IDC_SHADEBOOST, theApp.GetConfig("ShadeBoost", 0));
@ -334,7 +335,8 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code)
theApp.SetConfig("accurate_blend", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_BLEND));
theApp.SetConfig("accurate_date", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_DATE));
theApp.SetConfig("accurate_colclip", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_COLCLIP));
theApp.SetConfig("texture_cache_depth", (int)IsDlgButtonChecked(m_hWnd, IDC_TC_DEPTH));
// Shade Boost
theApp.SetConfig("ShadeBoost", (int)IsDlgButtonChecked(m_hWnd, IDC_SHADEBOOST));
@ -438,6 +440,8 @@ void GSSettingsDlg::UpdateControls()
EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_BLEND), ogl && hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_DATE), ogl && hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_COLCLIP), ogl && hw);
EnableWindow(GetDlgItem(m_hWnd, IDC_TC_DEPTH), ogl && hw);
//EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw); // Let uers set software params regardless of renderer used
//EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw);
//EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS), sw);

View File

@ -1486,6 +1486,11 @@ void GSState::Write(const uint8* mem, int len)
return;
}
GL_CACHE("Write! ... => 0x%x W:%d F:%d (DIR %d%d), dPos(%d %d) size(%d %d)",
m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM,
m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY,
m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, w, h);
if(PRIM->TME && (m_env.BITBLTBUF.DBP == m_context->TEX0.TBP0 || m_env.BITBLTBUF.DBP == m_context->TEX0.CBP)) // TODO: hmmmm
{
FlushPrim();
@ -1602,7 +1607,7 @@ void GSState::Move()
int w = m_env.TRXREG.RRW;
int h = m_env.TRXREG.RRH;
GL_CACHE("Move! %05x %d %d => %05x %d %d (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d)",
GL_CACHE("Move! 0x%x W:%d F:%d => 0x%x W:%d F:%d (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d)",
m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM,
m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM,
m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY,
@ -3040,6 +3045,7 @@ bool GSC_Okami(const GSFrameInfo& fi, int& skip)
bool GSC_MetalGearSolid3(const GSFrameInfo& fi, int& skip)
{
// Game requires sub RT support (texture cache limitation)
if(skip == 0)
{
if(fi.TME && fi.FBP == 0x02000 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01000) && fi.TPSM == PSM_PSMCT24)
@ -3112,7 +3118,11 @@ bool GSC_DBZBT3(const GSFrameInfo& fi, int& skip)
}
else if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00e00 || fi.FBP == 0x01000) && fi.FPSM == PSM_PSMCT16 && fi.TPSM == PSM_PSMZ16)
{
skip = 5;
// Texture shuffling must work on openGL
if (Dx_only)
skip = 5;
else
return false;
}
else if(fi.TME && fi.FPSM == fi.TPSM && fi.TBP0 == 0x03f00 && fi.TPSM == PSM_PSMCT32)
{
@ -3273,6 +3283,7 @@ bool GSC_ICO(const GSFrameInfo& fi, int& skip)
bool GSC_GT4(const GSFrameInfo& fi, int& skip)
{
// Game requires to extract source from RT (block boundary) (texture cache limitation)
if(skip == 0)
{
if(fi.TME && fi.FBP >= 0x02f00 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01180 /*|| fi.TBP0 == 0x01a40*/) && fi.TPSM == PSM_PSMT8) //TBP0 0x1a40 progressive
@ -3295,6 +3306,7 @@ bool GSC_GT4(const GSFrameInfo& fi, int& skip)
bool GSC_GT3(const GSFrameInfo& fi, int& skip)
{
// Same issue as GSC_GT4 ???
if(skip == 0)
{
if(fi.TME && fi.FBP >= 0x02de0 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01180) && fi.TPSM == PSM_PSMT8)
@ -3308,6 +3320,7 @@ bool GSC_GT3(const GSFrameInfo& fi, int& skip)
bool GSC_GTConcept(const GSFrameInfo& fi, int& skip)
{
// Same issue as GSC_GT4 ???
if(skip == 0)
{
if(fi.TME && fi.FBP >= 0x03420 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01400) && fi.TPSM == PSM_PSMT8)
@ -3409,7 +3422,7 @@ bool GSC_ResidentEvil4(const GSFrameInfo& fi, int& skip)
{
skip = 176;
}
else if(fi.TME && fi.FBP ==0x03100 && (fi.TBP0==0x2a00 ||fi.TBP0==0x3480) && fi.TPSM ==0 && fi.FBMSK == 0)
else if(fi.TME && fi.FBP ==0x03100 && (fi.TBP0==0x2a00 ||fi.TBP0==0x3480) && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0)
{
skip = 1;
}
@ -3423,7 +3436,7 @@ bool GSC_SacredBlaze(const GSFrameInfo& fi, int& skip)
//Fix Sacred Blaze rendering glitches
if(skip == 0)
{
if(fi.TME && (fi.FBP==0x0000 || fi.FBP==0x0e00) && (fi.TBP0==0x2880 || fi.TBP0==0x2a80 ) && fi.FPSM==fi.TPSM && fi.TPSM == PSM_PSMCT32 && fi.TPSM ==0 && fi.FBMSK == 0x0)
if(fi.TME && (fi.FBP==0x0000 || fi.FBP==0x0e00) && (fi.TBP0==0x2880 || fi.TBP0==0x2a80 ) && fi.FPSM==fi.TPSM && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0x0)
{
skip = 1;
}
@ -3499,25 +3512,6 @@ bool GSC_AceCombat4(const GSFrameInfo& fi, int& skip)
return true;
}
bool GSC_Drakengard2(const GSFrameInfo& fi, int& skip)
{
// Below hack breaks the GUI
/*if(skip == 0)
{
if(g_crc_region == CRC::CH && fi.TME && fi.FBP == 0x026c0 && fi.TBP0 == 0x00a00 && fi.FPSM ==2)
{
skip =34;
}
if((g_crc_region == CRC::US || g_crc_region == CRC::EU) && fi.TME && fi.FBP == 0x026c0 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x00a00 && fi.TPSM == PSM_PSMCT32)
{
skip = 64;
}
}*/
return true;
}
bool GSC_Tekken5(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
@ -3744,7 +3738,11 @@ bool GSC_Genji(const GSFrameInfo& fi, int& skip)
{
if(fi.TME && fi.FBP == 0x01500 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00e00 && fi.TPSM == PSM_PSMZ16)
{
skip = 6;
// likely fixed in openGL (texture shuffle)
if (Dx_only)
skip = 6;
else
return false;
}
else if(fi.TPSM == PSM_PSMCT24 && fi.TME ==0x0001 && fi.TBP0==fi.FBP)
{
@ -3832,7 +3830,7 @@ bool GSC_RadiataStories(const GSFrameInfo& fi, int& skip)
{
skip = 1;
}
else if(Dx_only && fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH)
else if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH)
{
// GH: Hack is quite similar to GSC_StarOcean3. It is potentially the same issue.
// Fixed on openGL
@ -3856,7 +3854,10 @@ bool GSC_HauntingGround(const GSFrameInfo& fi, int& skip)
{
if(fi.TME && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16S && fi.FBMSK == 0x03FFF)
{
skip = 1;
if (Dx_only)
skip = 1;
else
return false;
}
else if(fi.TME && fi.FBP == 0x3000 && fi.TBP0 == 0x3380)
{
@ -3989,6 +3990,7 @@ bool GSC_EternalPoison(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
{
// Texture shuffle ???
if(fi.TPSM == PSM_PSMCT16S && fi.TBP0 == 0x3200)
{
skip = 1;
@ -4173,18 +4175,6 @@ bool GSC_FFX(const GSFrameInfo& fi, int& skip)
return true;
}
bool GSC_ArctheLad(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
{
/*if(fi.TPSM == PSM_PSMT8H && fi.FBMSK >= 0xFFFFFFF)
{
skip = 1;
}*/
}
return true;
}
bool GSC_DemonStone(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
@ -4231,20 +4221,6 @@ bool GSC_TimeSplitters2(const GSFrameInfo& fi, int& skip)
return true;
}
bool GSC_ReZ(const GSFrameInfo& fi, int& skip)
{
//not needed anymore
/*if(skip == 0)
{
if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x008c0 || fi.FBP == 0x00a00) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32)
{
skip = 1;
}
}*/
return true;
}
bool GSC_LordOfTheRingsTwoTowers(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
@ -4379,6 +4355,7 @@ bool GSC_Black(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
{
// Note: the first part of the hack must be fixed in openGL (texture shuffle). Remains the 2nd part (HasSharedBits)
if(fi.TME /*&& (fi.FBP == 0x00000 || fi.FBP == 0x008c0)*/ && fi.FPSM == PSM_PSMCT16 && (fi.TBP0 == 0x01a40 || fi.TBP0 == 0x01b80 || fi.TBP0 == 0x030c0) && fi.TPSM == PSM_PSMZ16 || (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM)))
{
skip = 5;
@ -4511,7 +4488,7 @@ bool GSC_DevilMayCry3(const GSFrameInfo& fi, int& skip)
if(skip == 0)
{
if(fi.TME && fi.FBP == 0x01800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01000 && fi.TPSM == PSM_PSMZ16)
if(Dx_only && fi.TME && fi.FBP == 0x01800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01000 && fi.TPSM == PSM_PSMZ16)
{
skip = 32;
}
@ -4583,7 +4560,7 @@ bool GSC_BlackHawkDown(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
{
if(fi.TME && fi.FBP == 0x00800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01800 && fi.TPSM == PSM_PSMZ16)
if(Dx_only && fi.TME && fi.FBP == 0x00800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01800 && fi.TPSM == PSM_PSMZ16)
{
skip = 2; //wall of fog
}
@ -4606,6 +4583,8 @@ bool GSC_Burnout(const GSFrameInfo& fi, int& skip)
}
else if(fi.TME && fi.FPSM == PSM_PSMCT16 && fi.TPSM == PSM_PSMZ16) //fog
{
if (!Dx_only) return false;
if(fi.FBP == 0x00a00 && fi.TBP0 == 0x01e00)
{
skip = 4; //pal
@ -4829,6 +4808,8 @@ bool GSC_ZettaiZetsumeiToshi2(const GSFrameInfo& fi, int& skip)
}
else if((fi.FBP | fi.TBP0)&& fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16 && fi.FBMSK == 0x3FFF)
{
// Note start of the effect (texture shuffle) is fixed in openGL but maybe not the extra draw
// call....
skip = 1000;
}
@ -5425,131 +5406,137 @@ bool GSState::IsBadFrame(int& skip, int UserHacks_SkipDraw)
memset(map, 0, sizeof(map));
if (s_crc_hack_level > 1) {
map[CRC::Okami] = GSC_Okami;
map[CRC::MetalGearSolid3] = GSC_MetalGearSolid3;
map[CRC::AceCombat4] = GSC_AceCombat4;
map[CRC::AlpineRacer3] = GSC_AlpineRacer3;
map[CRC::Black] = GSC_Black;
map[CRC::BlackHawkDown] = GSC_BlackHawkDown;
map[CRC::BleachBladeBattlers] = GSC_BleachBladeBattlers;
map[CRC::BullyCC] = GSC_BullyCC; // Bully is fixed, maybe this one too?
map[CRC::BurnoutDominator] = GSC_Burnout;
map[CRC::BurnoutRevenge] = GSC_Burnout;
map[CRC::BurnoutTakedown] = GSC_Burnout;
map[CRC::CaptainTsubasa] = GSC_CaptainTsubasa;
map[CRC::CrashBandicootWoC] = GSC_CrashBandicootWoC;
map[CRC::CrashNburn] = GSC_CrashNburn;
map[CRC::DBZBT2] = GSC_DBZBT2;
map[CRC::DBZBT3] = GSC_DBZBT3;
map[CRC::SFEX3] = GSC_SFEX3;
map[CRC::Bully] = GSC_Bully;
map[CRC::BullyCC] = GSC_BullyCC;
map[CRC::SoTC] = GSC_SoTC;
map[CRC::OnePieceGrandAdventure] = GSC_OnePieceGrandAdventure;
map[CRC::OnePieceGrandBattle] = GSC_OnePieceGrandBattle;
map[CRC::ICO] = GSC_ICO;
map[CRC::GT4] = GSC_GT4;
map[CRC::GT3] = GSC_GT3;
map[CRC::GTConcept] = GSC_GTConcept;
map[CRC::WildArms4] = GSC_WildArms4;
map[CRC::WildArms5] = GSC_WildArms5;
map[CRC::Manhunt2] = GSC_Manhunt2;
map[CRC::CrashBandicootWoC] = GSC_CrashBandicootWoC;
map[CRC::ResidentEvil4] = GSC_ResidentEvil4;
map[CRC::Spartan] = GSC_Spartan;
map[CRC::AceCombat4] = GSC_AceCombat4;
map[CRC::Drakengard2] = GSC_Drakengard2;
map[CRC::Tekken5] = GSC_Tekken5;
map[CRC::IkkiTousen] = GSC_IkkiTousen;
map[CRC::GodOfWar] = GSC_GodOfWar;
map[CRC::GodOfWar2] = GSC_GodOfWar2;
map[CRC::GiTS] = GSC_GiTS;
map[CRC::Onimusha3] = GSC_Onimusha3;
map[CRC::TalesOfAbyss] = GSC_TalesOfAbyss;
map[CRC::SonicUnleashed] = GSC_SonicUnleashed;
map[CRC::SimpsonsGame] = GSC_SimpsonsGame;
map[CRC::Genji] = GSC_Genji;
map[CRC::RadiataStories] = GSC_RadiataStories;
map[CRC::HauntingGround] = GSC_HauntingGround;
map[CRC::DeathByDegreesTekkenNinaWilliams] = GSC_DeathByDegreesTekkenNinaWilliams;
map[CRC::DevilMayCry3] = GSC_DevilMayCry3;
map[CRC::EternalPoison] = GSC_EternalPoison;
map[CRC::EvangelionJo] = GSC_EvangelionJo;
map[CRC::SuikodenTactics] = GSC_SuikodenTactics;
map[CRC::CaptainTsubasa] = GSC_CaptainTsubasa;
map[CRC::Oneechanbara2Special] = GSC_Oneechanbara2Special;
map[CRC::FFVIIDoC] = GSC_FFVIIDoC;
map[CRC::FightingBeautyWulong] = GSC_FightingBeautyWulong;
map[CRC::FinalFightStreetwise] = GSC_FinalFightStreetwise;
map[CRC::FrontMission5] = GSC_FrontMission5;
map[CRC::Genji] = GSC_Genji;
map[CRC::GetaWayBlackMonday] = GSC_GetaWay;
map[CRC::GetaWay] = GSC_GetaWay;
map[CRC::GodHand] = GSC_GodHand;
map[CRC::GodOfWar2] = GSC_GodOfWar2;
map[CRC::GT3] = GSC_GT3;
map[CRC::GT4] = GSC_GT4;
map[CRC::GTASanAndreas] = GSC_GTASanAndreas;
map[CRC::GTConcept] = GSC_GTConcept;
map[CRC::HauntingGround] = GSC_HauntingGround;
map[CRC::HeavyMetalThunder] = GSC_HeavyMetalThunder;
map[CRC::HummerBadlands] = GSC_HummerBadlands;
map[CRC::ICO] = GSC_ICO;
map[CRC::IkkiTousen] = GSC_IkkiTousen;
map[CRC::JamesBondEverythingOrNothing] = GSC_JamesBondEverythingOrNothing;
map[CRC::KnightsOfTheTemple2] = GSC_KnightsOfTheTemple2;
map[CRC::Kunoichi] = GSC_Kunoichi;
map[CRC::LordOfTheRingsThirdAge] = GSC_LordOfTheRingsThirdAge;
map[CRC::Manhunt2] = GSC_Manhunt2;
map[CRC::MetalGearSolid3] = GSC_MetalGearSolid3;
map[CRC::MidnightClub3] = GSC_MidnightClub3;
map[CRC::NanoBreaker] = GSC_NanoBreaker;
map[CRC::NarutimateAccel] = GSC_NarutimateAccel;
map[CRC::Naruto] = GSC_Naruto;
map[CRC::EternalPoison] = GSC_EternalPoison;
map[CRC::LegoBatman] = GSC_LegoBatman;
map[CRC::SakuraTaisen] = GSC_SakuraTaisen;
map[CRC::TenchuWoH] = GSC_Tenchu;
map[CRC::TenchuFS] = GSC_Tenchu;
map[CRC::Sly3] = GSC_Sly3;
map[CRC::Sly2] = GSC_Sly2;
map[CRC::ShadowofRome] = GSC_ShadowofRome;
map[CRC::FFXII] = GSC_FFXII;
map[CRC::FFX2] = GSC_FFX2;
map[CRC::FFX] = GSC_FFX;
map[CRC::ArctheLad] = GSC_ArctheLad;
map[CRC::DemonStone] = GSC_DemonStone;
map[CRC::BigMuthaTruckers] = GSC_BigMuthaTruckers;
map[CRC::TimeSplitters2] = GSC_TimeSplitters2;
map[CRC::ReZ] = GSC_ReZ;
map[CRC::LordOfTheRingsTwoTowers] = GSC_LordOfTheRingsTwoTowers;
map[CRC::LordOfTheRingsThirdAge] = GSC_LordOfTheRingsThirdAge;
map[CRC::Oneechanbara2Special] = GSC_Oneechanbara2Special;
map[CRC::Onimusha3] = GSC_Onimusha3;
map[CRC::RedDeadRevolver] = GSC_RedDeadRevolver;
map[CRC::HeavyMetalThunder] = GSC_HeavyMetalThunder;
map[CRC::BleachBladeBattlers] = GSC_BleachBladeBattlers;
map[CRC::CrashNburn] = GSC_CrashNburn;
map[CRC::TombRaiderUnderworld] = GSC_TombRaiderUnderWorld;
map[CRC::TombRaiderAnniversary] = GSC_TombRaider;
map[CRC::TombRaiderLegend] = GSC_TombRaiderLegend;
map[CRC::SSX3] = GSC_SSX3;
map[CRC::Black] = GSC_Black;
map[CRC::FFVIIDoC] = GSC_FFVIIDoC;
map[CRC::StarWarsForceUnleashed] = GSC_StarWarsForceUnleashed;
map[CRC::StarWarsBattlefront] = GSC_StarWarsBattlefront;
map[CRC::StarWarsBattlefront2] = GSC_StarWarsBattlefront2;
map[CRC::BlackHawkDown] = GSC_BlackHawkDown;
map[CRC::DevilMayCry3] = GSC_DevilMayCry3;
map[CRC::BurnoutTakedown] = GSC_Burnout;
map[CRC::BurnoutRevenge] = GSC_Burnout;
map[CRC::BurnoutDominator] = GSC_Burnout;
map[CRC::MidnightClub3] = GSC_MidnightClub3;
map[CRC::SpyroNewBeginning] = GSC_SpyroNewBeginning;
map[CRC::SpyroEternalNight] = GSC_SpyroEternalNight;
map[CRC::TalesOfLegendia] = GSC_TalesOfLegendia;
map[CRC::NanoBreaker] = GSC_NanoBreaker;
map[CRC::Kunoichi] = GSC_Kunoichi;
map[CRC::Yakuza] = GSC_Yakuza;
map[CRC::Yakuza2] = GSC_Yakuza2;
map[CRC::SkyGunner] = GSC_SkyGunner;
map[CRC::JamesBondEverythingOrNothing] = GSC_JamesBondEverythingOrNothing;
map[CRC::ZettaiZetsumeiToshi2] = GSC_ZettaiZetsumeiToshi2;
map[CRC::ShinOnimusha] = GSC_ShinOnimusha;
map[CRC::XE3] = GSC_XE3;
map[CRC::GetaWay] = GSC_GetaWay;
map[CRC::GetaWayBlackMonday] = GSC_GetaWay;
map[CRC::ResidentEvil4] = GSC_ResidentEvil4;
map[CRC::SacredBlaze] = GSC_SacredBlaze;
map[CRC::SakuraTaisen] = GSC_SakuraTaisen;
map[CRC::SakuraWarsSoLongMyLove] = GSC_SakuraWarsSoLongMyLove;
map[CRC::FightingBeautyWulong] = GSC_FightingBeautyWulong;
map[CRC::TouristTrophy] = GSC_TouristTrophy;
map[CRC::GTASanAndreas] = GSC_GTASanAndreas;
map[CRC::FrontMission5] = GSC_FrontMission5;
map[CRC::GodHand] = GSC_GodHand;
map[CRC::KnightsOfTheTemple2] = GSC_KnightsOfTheTemple2;
map[CRC::UltramanFightingEvolution] = GSC_UltramanFightingEvolution;
map[CRC::DeathByDegreesTekkenNinaWilliams] = GSC_DeathByDegreesTekkenNinaWilliams;
map[CRC::AlpineRacer3] = GSC_AlpineRacer3;
map[CRC::HummerBadlands] = GSC_HummerBadlands;
map[CRC::SengokuBasara] = GSC_SengokuBasara;
map[CRC::Grandia3] = GSC_Grandia3;
map[CRC::FinalFightStreetwise] = GSC_FinalFightStreetwise;
map[CRC::TalesofSymphonia] = GSC_TalesofSymphonia;
map[CRC::ShadowofRome] = GSC_ShadowofRome;
map[CRC::ShinOnimusha] = GSC_ShinOnimusha;
map[CRC::Simple2000Vol114] = GSC_Simple2000Vol114;
map[CRC::SkyGunner] = GSC_SkyGunner;
map[CRC::SoulCalibur2] = GSC_SoulCalibur2;
map[CRC::SoulCalibur3] = GSC_SoulCalibur3;
map[CRC::Simple2000Vol114] = GSC_Simple2000Vol114;
map[CRC::UrbanReign] = GSC_UrbanReign;
map[CRC::Spartan] = GSC_Spartan;
map[CRC::StarWarsBattlefront2] = GSC_StarWarsBattlefront2;
map[CRC::StarWarsBattlefront] = GSC_StarWarsBattlefront;
map[CRC::StarWarsForceUnleashed] = GSC_StarWarsForceUnleashed;
map[CRC::SteambotChronicles] = GSC_SteambotChronicles;
map[CRC::SacredBlaze] = GSC_SacredBlaze;
map[CRC::SMTNocturne] = GSC_SMTNocturneDDS<0x2054E870>;
map[CRC::SuikodenTactics] = GSC_SuikodenTactics;
map[CRC::TalesOfAbyss] = GSC_TalesOfAbyss;
map[CRC::TalesOfLegendia] = GSC_TalesOfLegendia;
map[CRC::TalesofSymphonia] = GSC_TalesofSymphonia;
map[CRC::Tekken5] = GSC_Tekken5;
map[CRC::TimeSplitters2] = GSC_TimeSplitters2;
map[CRC::TombRaiderAnniversary] = GSC_TombRaider;
map[CRC::TombRaiderLegend] = GSC_TombRaiderLegend;
map[CRC::TombRaiderUnderworld] = GSC_TombRaiderUnderWorld;
map[CRC::TouristTrophy] = GSC_TouristTrophy;
map[CRC::UltramanFightingEvolution] = GSC_UltramanFightingEvolution;
map[CRC::UrbanReign] = GSC_UrbanReign;
map[CRC::WildArms4] = GSC_WildArms4;
map[CRC::WildArms5] = GSC_WildArms5;
map[CRC::XE3] = GSC_XE3;
map[CRC::Yakuza2] = GSC_Yakuza2;
map[CRC::Yakuza] = GSC_Yakuza;
map[CRC::ZettaiZetsumeiToshi2] = GSC_ZettaiZetsumeiToshi2;
// Only Aggresive
map[CRC::FFX2] = GSC_FFX2;
map[CRC::FFX] = GSC_FFX;
map[CRC::FFXII] = GSC_FFXII;
map[CRC::SMTDDS1] = GSC_SMTNocturneDDS<0x203BA820>;
map[CRC::SMTDDS2] = GSC_SMTNocturneDDS<0x20435BF0>;
map[CRC::SMTNocturne] = GSC_SMTNocturneDDS<0x2054E870>;
map[CRC::SoTC] = GSC_SoTC;
map[CRC::SSX3] = GSC_SSX3;
}
// Hack that were fixed on openGL
if (Dx_only) {
// This one requires accurate_colclip
map[CRC::Bully] = GSC_Bully;
map[CRC::LordOfTheRingsTwoTowers] = GSC_LordOfTheRingsTwoTowers;
map[CRC::Okami] = GSC_Okami;
map[CRC::SimpsonsGame] = GSC_SimpsonsGame;
// Not tested but must be fixed with texture shuffle
map[CRC::BigMuthaTruckers] = GSC_BigMuthaTruckers;
map[CRC::DemonStone] = GSC_DemonStone;
map[CRC::GiTS] = GSC_GiTS;
map[CRC::LegoBatman] = GSC_LegoBatman;
map[CRC::OnePieceGrandAdventure] = GSC_OnePieceGrandAdventure;
map[CRC::OnePieceGrandBattle] = GSC_OnePieceGrandBattle;
map[CRC::SFEX3] = GSC_SFEX3;
map[CRC::SpyroEternalNight] = GSC_SpyroEternalNight;
map[CRC::SpyroNewBeginning] = GSC_SpyroNewBeginning;
map[CRC::SonicUnleashed] = GSC_SonicUnleashed;
map[CRC::TenchuFS] = GSC_Tenchu;
map[CRC::TenchuWoH] = GSC_Tenchu;
// Those games might requires accurate fbmask
map[CRC::Sly2] = GSC_Sly2;
map[CRC::Sly3] = GSC_Sly3;
// Those games require accurate_colclip (perf)
map[CRC::CastlevaniaCoD] = GSC_Castlevania;
map[CRC::CastlevaniaLoI] = GSC_Castlevania;
map[CRC::GodOfWar] = GSC_GodOfWar;
// Those games emulate a stencil buffer with the alpha channel of the RT (Slow)
map[CRC::RadiataStories] = GSC_RadiataStories;
map[CRC::StarOcean3] = GSC_StarOcean3;
map[CRC::ValkyrieProfile2] = GSC_ValkyrieProfile2;
// Deprecated hack could be removed (Cutie)
map[CRC::Grandia3] = GSC_Grandia3;
}
}

View File

@ -26,10 +26,13 @@ GSTextureCache::GSTextureCache(GSRenderer* r)
: m_renderer(r)
{
m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0;
UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
m_paltex = !!theApp.GetConfig("paltex", 0);
m_paltex = !!theApp.GetConfig("paltex", 0);
m_preload_frame = theApp.GetConfig("preload_frame_with_gs_data", 0);
m_can_convert_depth = IsOpenGL() ? theApp.GetConfig("texture_cache_depth", 1) : 0;
m_crc_hack_level = theApp.GetConfig("crc_hack_level", 3);
m_temp = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
}
@ -121,28 +124,68 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything
// else.)
//for(int type = 0; type < 2 && dst == NULL; type++)
for(int type = 0; type < 1 && dst == NULL; type++) // Only look for render target, no depth stencil
for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++)
{
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++)
{
Target* t = *i;
if(t->m_used && t->m_dirty.empty()) {
// Typical bug (MGS3 blue cloud):
// 1/ RT used as 32 bits => alpha channel written
// 2/ RT used as 24 bits => no update of alpha channel
// 3/ Lookup of texture that used alpha channel as index, HasSharedBits will return false
// because of the previous draw call format
//
// Solution: consider the RT as 32 bits if the alpha was used in the past
uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) {
if (!IsOpenGL() && (psm == PSM_PSMT8)) {
// OpenGL can convert the texture directly in the GPU. Not sure we want to keep this
// code for DX. It fixes effect but it is slow (MGS3)
// It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will
// be slow but
// 1/ it just works :)
// 2/ even with upscaling
// 3/ for both DX and OpenGL
// Gregory: to avoid a massive slow down for nothing, let's only enable
// this code when CRC is below the FULL level
if (m_crc_hack_level < 3)
Read(t, t->m_valid);
else
dst = t;
} else {
dst = t;
}
break;
} else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM)) {
// Detect half of the render target (fix snow engine game)
// Target Page (8KB) have always a width of 64 pixels
// Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10
half_right = true;
dst = t;
break;
}
}
}
if (dst == NULL && CanConvertDepth()) {
// Let's try a trick to avoid to use wrongly a depth buffer
// Unfortunately, I don't have any Arc the Lad testcase
//
// 1/ Check only current frame, I guess it is only used as a postprocessing effect
for(list<Target*>::iterator i = m_dst[DepthStencil].begin(); i != m_dst[DepthStencil].end(); i++) {
Target* t = *i;
if(t->m_used && t->m_dirty.empty()) {
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) {
dst = t;
break;
} else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM)) {
// Detect half of the render target (fix snow engine game)
// Target Page (8KB) have always a width of 64 pixels
// Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10
half_right = true;
dst = t;
break;
}
if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
dst = t;
break;
}
}
}
@ -152,11 +195,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
{
#ifdef ENABLE_OGL_DEBUG
if (dst) {
GL_CACHE("TC: dst hit (%s): %d (0x%x)", half_right ? "half" : "full",
GL_CACHE("TC: dst %s hit (%s): %d (0x%x, F:0x%x)", to_string(dst->m_type), half_right ? "half" : "full",
dst->m_texture ? dst->m_texture->GetID() : 0,
TEX0.TBP0);
TEX0.TBP0, TEX0.PSM);
} else {
GL_CACHE("TC: src miss (0x%x)", TEX0.TBP0);
GL_CACHE("TC: src miss (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM);
}
#endif
src = CreateSource(TEX0, TEXA, dst, half_right);
@ -165,12 +208,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
{
return NULL;
}
#ifdef ENABLE_OGL_DEBUG
} else {
GL_CACHE("TC: src hit: %d (0x%x)",
GL_CACHE("TC: src hit: %d (0x%x F:0x%x)",
src->m_texture ? src->m_texture->GetID() : 0,
TEX0.TBP0);
#endif
TEX0.TBP0, TEX0.PSM);
}
if (src->m_palette)
@ -207,35 +249,80 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
dst = t;
#ifdef ENABLE_OGL_DEBUG
// Likely the root cause of tons and tons of bug
if (dst->m_TEX0.PSM != TEX0.PSM) {
GL_INS("TC: ERROR: use a target with format 0x%x as 0x%x without any conversion", dst->m_TEX0.PSM, TEX0.PSM);
}
#endif
dst->m_32_bits_fmt |= !(TEX0.PSM & 2);
dst->m_TEX0 = TEX0;
break;
}
}
if (dst) {
GL_CACHE("TC: Lookup Target(%s) %dx%d, hit: %d (0x%x, F:0x%x)", to_string(type), w, h, dst->m_texture->GetID(), bp, TEX0.PSM);
dst->Update();
dst->m_dirty_alpha |= (TEX0.PSM != PSM_PSMCT24) && (TEX0.PSM != PSM_PSMZ24);
} else if (CanConvertDepth()) {
int rev_type = (type == DepthStencil) ? RenderTarget : DepthStencil;
GSVector4 sRect(0, 0, 1.0, 1.0);
GSVector4 dRect(0, 0, w, h);
// Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick
// some bad data.
for(list<Target*>::iterator i = m_dst[rev_type].begin(); i != m_dst[rev_type].end(); i++)
{
Target* t = *i;
if(!t->m_age && bp == t->m_TEX0.TBP0)
{
dst = CreateTarget(TEX0, w, h, type);
if (type == DepthStencil) {
GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM);
int shader = (TEX0.PSM & 1) ? 13 : 12;
m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false);
} else {
GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x)", w, h, bp, TEX0.PSM);
m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, 11, false);
}
break;
}
}
}
if(dst == NULL)
{
GL_CACHE("TC: Lookup Target(T%d) %dx%d, miss (0x%x)", type, w, h, bp);
GL_CACHE("TC: Lookup Target(%s) %dx%d, miss (0x%x, F:0x%x)", to_string(type), w, h, bp, TEX0.PSM);
dst = CreateTarget(TEX0, w, h, type);
if(dst == NULL)
{
return NULL;
}
}
else
{
GL_CACHE("TC: Lookup Target(T%d) %dx%d, hit: %d (0x%x)", type, w, h, dst->m_texture->GetID(), bp);
dst->Update();
#ifdef ENABLE_OGL_DEBUG
// In theory new textures contain invalidated data. Still in theory a new target
// must contains the content of the GS memory.
// In practice, TC will wrongly invalidate some RT. For example due to write on the alpha
// channel but colors is still valid. Unfortunately TC doesn't support the upload of data
// in target.
//
// Cleaning the code here will likely break several games. However it might reduce
// the noise in draw call debugging. It is the main reason to enable it on debug build.
//
// From a performance point of view, it might cost a little on big upscaling
// but normally few RT are miss so it must remain reasonable.
if (IsOpenGL()) {
switch (type) {
case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break;
case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break;
default:break;
}
}
#endif
}
if(m_renderer->CanUpscale())
@ -258,6 +345,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
hh *= 2;
}
// Gregory: I'm sure this sillyness is related to the usage of a 32bits
// buffer as a 16 bits format. In this case the height of the buffer is
// multiplyed by 2 (Hence a scissor bigger than the RT)
// This vp2 fix doesn't work most of the time
if(hh < 512 && m_renderer->m_context->SCISSOR.SCAY1 == 511) // vp2
@ -280,7 +371,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
return dst;
}
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h)
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h)
{
uint32 bp = TEX0.TBP0;
@ -316,13 +407,23 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
dst = CreateTarget(TEX0, w, h, RenderTarget);
if(dst == NULL)
{
return NULL;
}
m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); // new frame buffers after reset should be cleared, don't display memory garbage
if (m_preload_frame) {
// Load GS data into frame. Game can directly uploads a background or the full image in
// "CTRC" buffer. It will also avoid various black screen issue in gs dump.
//
// Code is more or less an equivalent of the SW renderer
//
// Option is hidden and not enabled by default to avoid any regression
dst->m_dirty.push_back(GSDirtyRect(GSVector4i(0, 0, TEX0.TBW * 64, real_h), TEX0.PSM));
dst->Update();
}
}
else
{
@ -334,6 +435,33 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
return dst;
}
// Goal: Depth And Target at the same address is not possible. On GS it is
// the same memory but not on the Dx/GL. Therefore a write to the Depth/Target
// must invalidate the Target/Depth respectively
void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
{
if (!CanConvertDepth())
return;
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++)
{
Target* t = *i;
if(bp == t->m_TEX0.TBP0)
{
GL_CACHE("TC: InvalidateVideoMemType: Remove Target(%s) %d (0x%x)", to_string(type),
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
m_dst[type].erase(i);
delete t;
break;
}
}
}
// Goal: invalidate data sent to the GPU when the source (GS memory) is modified
// Called each time you want to write to the GS memory
void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target)
@ -455,17 +583,27 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
Target* t = *j;
// GH: (I think) this code is completely broken. Typical issue:
// EE write an alpha channel into 32 bits texture
// Results: the target is deleted (because HasCompatibleBits is false)
//
// Major issues are expected if the game try to reuse the target
// If we dirty the RT, it will likely upload partially invalid data.
// (The color on the previous example)
if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
if(!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
{
GL_CACHE("TC: Dirty Target(%s) %d (0x%x)", to_string(type),
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
t->m_dirty.push_back(GSDirtyRect(r, psm));
t->m_TEX0.TBW = bw;
}
else
{
m_dst[type].erase(j);
GL_CACHE("TC: Remove Target(%d) %d (0x%x)", type,
GL_CACHE("TC: Remove Target(%s) %d (0x%x)", to_string(type),
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
delete t;
@ -473,6 +611,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
}
}
// GH: Try to detect texture write that will overlap with a target buffer
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && bp < t->m_TEX0.TBP0)
{
uint32 rowsize = bw * 8192;
@ -484,6 +623,9 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
if(r.bottom > y)
{
GL_CACHE("TC: Dirty After Target(%s) %d (0x%x)", to_string(type),
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
// TODO: do not add this rect above too
t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), psm));
t->m_TEX0.TBW = bw;
@ -645,7 +787,7 @@ void GSTextureCache::IncAge()
if(++t->m_age > maxage)
{
m_dst[type].erase(j);
GL_CACHE("TC: Remove Target(T%d): %d (0x%x) due to age", type,
GL_CACHE("TC: Remove Target(%s): %d (0x%x) due to age", to_string(type),
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
@ -681,34 +823,46 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
{
// TODO: clean up this mess
src->m_target = true;
// Shader 11 convert depth to color
// Shader 14 convert 32 bits color to 8 bits color
int shader = dst->m_type != RenderTarget ? 11 : 0;
bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL();
if(dst->m_type != RenderTarget)
{
GL_CACHE("TC: Remove dst because not a RT %d (0x%x)",
dst->m_texture ? dst->m_texture->GetID() : 0,
dst->m_TEX0.TBP0);
// TODO
delete src;
return NULL;
if (is_8bits) {
GL_INS("Reading RT as a packed-indexed 8 bits format");
shader = 14; // ask a conversion to 8 bits format
}
#ifdef ENABLE_OGL_DEBUG
if (TEX0.PSM == PSM_PSMT4) {
GL_INS("ERROR: Reading RT as a packed-indexed 4 bits format is not supported");
}
#endif
src->m_32_bits_fmt = dst->m_32_bits_fmt;
src->m_target = true;
dst->Update();
GSTexture* tmp = NULL;
if(dst->m_texture->IsMSAA())
if (dst->m_texture->IsMSAA())
{
tmp = dst->m_texture;
dst->m_texture = m_renderer->m_dev->Resolve(dst->m_texture);
}
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
int w = (int)(dst->m_texture->GetScale().x * tw);
int h = (int)(dst->m_texture->GetScale().y * th);
if (is_8bits) {
// Unscale 8 bits textures, quality won't be nice but format is really awful
w = tw;
h = th;
}
GSVector2i dstsize = dst->m_texture->GetSize();
@ -776,42 +930,89 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
GSVector4 dRect(0, 0, w, h);
if(w > dstsize.x)
{
scale.x = (float)dstsize.x / tw;
dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x;
w = dstsize.x;
}
// Lengthy explanation of the rescaling code.
// Here an example in 2x:
// RT is 1280x1024 but only contains 512x448 valid data (so 256x224 pixels without upscaling)
//
// PS2 want to read it back as a 1024x1024 pixels (they don't care about the extra pixels)
// So in theory we need to shrink a 2048x2048 RT into a 1024x1024 texture. Obviously the RT is
// too small.
//
// So we will only limit the resize to the available data in RT.
// Therefore we will resize the RT from 1280x1024 to 1280x1024/2048x2048 % of the new texture
// size (which is 1280x1024) (i.e. 800x512)
// From the rendering point of view. UV coordinate will be normalized on the real GS texture size
// This way it can be used on an upscaled texture without extra scaling factor (only requirement is
// to have same proportion)
//
// FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0)
// At 2x it will become 0.5/128 * 256 = 1 (pixel 1)
// I think it is the purpose of the UserHacks_HalfPixelOffset below. However implementation is less
// than ideal.
// 1/ It suppose games have an half pixel offset on texture coordinate which could be wrong
// 2/ It doesn't support rescaling of the RT (tw = 1024)
// Maybe it will be more easy to just round the UV value in the Vertex Shader
if(h > dstsize.y)
{
scale.y = (float)dstsize.y / th;
dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y;
h = dstsize.y;
if (!is_8bits) {
// 8 bits handling is special due to unscaling. It is better to not execute this code
if (w > dstsize.x)
{
scale.x = (float)dstsize.x / tw;
dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x;
w = dstsize.x;
}
if (h > dstsize.y)
{
scale.y = (float)dstsize.y / th;
dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y;
h = dstsize.y;
}
}
GSVector4 sRect(0, 0, w, h);
GSTexture* sTex = src->m_texture ? src->m_texture : dst->m_texture;
GSTexture* dTex = m_renderer->m_dev->CreateRenderTarget(w, h, false);
// GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format
// However it is different here. We want to reuse a Render Target as a texture.
// Because the texture is already on the GPU, CPU can't convert it.
bool linear = true;
if (psm.pal > 0) {
src->m_palette = m_renderer->m_dev->CreateTexture(256, 1);
// Palette is used to interpret the alpha channel of the RT as an index.
// Star Ocean 3 uses it to emulate a stencil buffer.
// It is a very bad idea to force bilinear filtering on it.
linear = false;
}
// Disable linear filtering for various GS post-processing effect
// 1/ Palette is used to interpret the alpha channel of the RT as an index.
// Star Ocean 3 uses it to emulate a stencil buffer.
// 2/ Z formats are a bad idea to interpolate (discontinuties).
// 3/ 16 bits buffer is used to move data from a channel to another.
//
// I keep linear filtering for standard color even if I'm not sure that it is
// working correctly.
// Indeed, texture is reduced so you need to read all covered pixels (9 in 3x)
// to correctly interpolate the value. Linear interpolation is likely acceptable
// only in 2x scaling
//
// Src texture will still be bilinear interpolated so I'm really not sure
// that we need to do it here too.
//
// Future note: instead to do
// RT 2048x2048 -> T 1024x1024 -> RT 2048x2048
// We can maybe sample directly a bigger texture
// RT 2048x2048 -> T 2048x2048 -> RT 2048x2048
// Pro: better quality. Copy instead of StretchRect (must be faster)
// Cons: consume more memory
//
// In distant future: investigate to reuse the RT directly without any
// copy. Likely a speed boost and memory usage reduction.
bool linear = (TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24);
if(!src->m_texture)
{
src->m_texture = dTex;
}
if((sRect == dRect).alltrue())
if ((sRect == dRect).alltrue() && !shader)
{
if (half_right) {
// You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT
@ -825,6 +1026,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
}
else
{
// Different size or not the same format
sRect.z /= sTex->GetWidth();
sRect.w /= sTex->GetHeight();
@ -832,7 +1034,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
sRect.x = sRect.z/2.0f;
}
m_renderer->m_dev->StretchRect(sTex, sRect, dTex, dRect, 0, linear);
m_renderer->m_dev->StretchRect(sTex, sRect, dTex, dRect, shader, linear);
}
if(dTex != src->m_texture)
@ -849,7 +1051,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
if(tmp != NULL)
{
// tmp is texture before a MultiSample resolve
// tmp is the texture before a MultiSample resolve
m_renderer->m_dev->Recycle(dst->m_texture);
dst->m_texture = tmp;
@ -907,7 +1109,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type)
{
Target* t = new Target(m_renderer, TEX0, m_temp);
Target* t = new Target(m_renderer, TEX0, m_temp, CanConvertDepth());
// FIXME: initial data should be unswizzled from local mem in Update() if dirty
@ -943,6 +1145,7 @@ GSTextureCache::Surface::Surface(GSRenderer* r, uint8* temp)
, m_texture(NULL)
, m_age(0)
, m_temp(temp)
, m_32_bits_fmt(false)
{
m_TEX0.TBP0 = 0x3fff;
}
@ -1195,12 +1398,15 @@ void GSTextureCache::Source::Flush(uint32 count)
// GSTextureCache::Target
GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp)
GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported)
: Surface(r, temp)
, m_type(-1)
, m_used(false)
, m_depth_supported(depth_supported)
{
m_TEX0 = TEX0;
m_32_bits_fmt |= !(TEX0.PSM & 2);
m_dirty_alpha = (TEX0.PSM != PSM_PSMCT24) && (TEX0.PSM != PSM_PSMZ24);
m_valid = GSVector4i::zero();
}
@ -1210,59 +1416,76 @@ void GSTextureCache::Target::Update()
Surface::Update();
// FIXME: the union of the rects may also update wrong parts of the render target (but a lot faster :)
// GH: it must be doable
// 1/ rescale the new t to the good size
// 2/ copy each rectangle (rescale the rectangle) (use CopyRect or multiple vertex)
// Alternate
// 1/ uses multiple vertex rectangle
GSVector4i r = m_dirty.GetDirtyRectAndClear(m_TEX0, m_texture->GetSize());
if(r.rempty()) return;
if (r.rempty()) return;
int w = r.width();
int h = r.height();
GIFRegTEXA TEXA;
TEXA.AEM = 1;
TEXA.TA0 = 0;
TEXA.TA1 = 0x80;
GSTexture* t = m_renderer->m_dev->CreateTexture(w, h);
if (t == NULL) return;
// No handling please
if ((m_type == DepthStencil) && !m_depth_supported) {
// do the most likely thing a direct write would do, clear it
GL_INS("ERROR: Update DepthStencil dummy");
if((m_renderer->m_game.flags & CRC::ZWriteMustNotClear) == 0)
m_renderer->m_dev->ClearDepth(m_texture, 0);
return;
}
const GSOffset* off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSTexture::GSMap m;
if(t->Map(m))
{
m_renderer->m_mem.ReadTexture(off, r, m.bits, m.pitch, TEXA);
t->Unmap();
}
else
{
int pitch = ((w + 3) & ~3) * 4;
m_renderer->m_mem.ReadTexture(off, r, m_temp, pitch, TEXA);
t->Update(r.rsize(), m_temp, pitch);
}
// m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4);
// Copy the new GS memory content into the destination texture.
if(m_type == RenderTarget)
{
int w = r.width();
int h = r.height();
GL_INS("ERROR: Update RenderTarget");
if(GSTexture* t = m_renderer->m_dev->CreateTexture(w, h))
{
const GSOffset* off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GIFRegTEXA TEXA;
TEXA.AEM = 1;
TEXA.TA0 = 0;
TEXA.TA1 = 0x80;
GSTexture::GSMap m;
if(t->Map(m))
{
m_renderer->m_mem.ReadTexture(off, r, m.bits, m.pitch, TEXA);
t->Unmap();
}
else
{
int pitch = ((w + 3) & ~3) * 4;
m_renderer->m_mem.ReadTexture(off, r, m_temp, pitch, TEXA);
t->Update(r.rsize(), m_temp, pitch);
}
// m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4);
m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy());
m_renderer->m_dev->Recycle(t);
}
m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy());
}
else if(m_type == DepthStencil)
{
// do the most likely thing a direct write would do, clear it
GL_INS("ERROR: Update DepthStencil");
if((m_renderer->m_game.flags & CRC::ZWriteMustNotClear) == 0)
{
m_renderer->m_dev->ClearDepth(m_texture, 0);
}
// FIXME linear or not?
m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(), 12);
}
m_renderer->m_dev->Recycle(t);
}
// GSTextureCache::SourceMap

View File

@ -40,6 +40,7 @@ public:
GIFRegTEXA m_TEXA;
int m_age;
uint8* m_temp;
bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture
public:
Surface(GSRenderer* r, uint8* temp);
@ -80,9 +81,11 @@ public:
bool m_used;
GSDirtyRectList m_dirty;
GSVector4i m_valid;
bool m_depth_supported;
bool m_dirty_alpha;
public:
Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp);
Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported);
virtual void Update();
};
@ -109,7 +112,10 @@ protected:
list<Target*> m_dst[2];
bool m_paltex;
int m_spritehack;
bool m_preload_frame;
uint8* m_temp;
bool m_can_convert_depth;
int m_crc_hack_level;
virtual Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false);
virtual Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type);
@ -122,6 +128,9 @@ protected:
virtual void Read(Target* t, const GSVector4i& r) = 0;
#endif
virtual bool CanConvertDepth() { return m_can_convert_depth; }
virtual bool IsOpenGL() { return false; }
public:
GSTextureCache(GSRenderer* r);
virtual ~GSTextureCache();
@ -133,11 +142,16 @@ public:
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used);
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h);
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h);
void InvalidateVideoMemType(int type, uint32 bp);
void InvalidateVideoMem(GSOffset* off, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(GSOffset* off, const GSVector4i& r);
void IncAge();
bool UserHacks_HalfPixelOffset;
const char* to_string(int type) {
return (type == DepthStencil) ? "Depth" : "Color";
}
};

View File

@ -50,7 +50,7 @@ void GSTextureCache11::Read(Target* t, const GSVector4i& r)
return;
}
if(!t->m_dirty.empty())
if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0))
{
return;
}

View File

@ -31,6 +31,8 @@ protected:
void Read(Target* t, const GSVector4i& r);
virtual bool CanConvertDepth() { return false; }
public:
GSTextureCache11(GSRenderer* r);
};

View File

@ -50,7 +50,7 @@ void GSTextureCache9::Read(Target* t, const GSVector4i& r)
return;
}
if(!t->m_dirty.empty())
if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0))
{
return;
}

View File

@ -31,6 +31,8 @@ protected:
void Read(Target* t, const GSVector4i& r);
virtual bool CanConvertDepth() { return false; }
public:
GSTextureCache9(GSRenderer* r);
};

View File

@ -30,10 +30,8 @@ GSTextureCacheOGL::GSTextureCacheOGL(GSRenderer* r)
void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r)
{
if(!t->m_dirty.empty())
{
if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0))
return;
}
const GIFRegTEX0& TEX0 = t->m_TEX0;
@ -77,10 +75,8 @@ void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r)
// Yes lots of logging, but I'm not confident with this code
GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM);
GL_CACHE("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]",
t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM);
GL_PERF("Read texture from GPU. Format(0x%x)", TEX0.PSM);
GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d",
t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height());
GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy();

View File

@ -28,10 +28,12 @@
class GSTextureCacheOGL : public GSTextureCache
{
protected:
int Get8bitFormat() { return GL_R8; /* TODO return DXGI_FORMAT_A8_UNORM;*/}
int Get8bitFormat() { return GL_R8;}
void Read(Target* t, const GSVector4i& r);
virtual bool IsOpenGL() { return true; }
public:
GSTextureCacheOGL(GSRenderer* r);
};

View File

@ -178,7 +178,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
if(i == m_ps.end())
{
string str[18];
string str[20];
str[0] = format("%d", sel.fst);
str[1] = format("%d", sel.wms);
@ -198,6 +198,8 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
str[15] = format("%d", sel.spritehack);
str[16] = format("%d", sel.tcoffsethack);
str[17] = format("%d", sel.point_sampler);
str[18] = format("%d", sel.shuffle);
str[19] = format("%d", sel.read_ba);
D3D11_SHADER_MACRO macro[] =
{
@ -219,6 +221,8 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
{"PS_SPRITEHACK", str[15].c_str()},
{"PS_TCOFFSETHACK", str[16].c_str()},
{"PS_POINT_SAMPLER", str[17].c_str()},
{"PS_SHUFFLE", str[18].c_str() },
{"PS_READ_BA", str[19].c_str() },
{NULL, NULL},
};

View File

@ -64,7 +64,7 @@ namespace PboPool {
gl_GenBuffers(countof(m_pool), m_pool);
m_texture_storage = GLLoader::found_GL_ARB_buffer_storage;
// Code is really faster on MT driver. So far only nvidia support it
if (!(GLLoader::nvidia_buggy_driver && theApp.GetConfig("enable_nvidia_multi_thread", 1)))
if (!GLLoader::nvidia_buggy_driver)
m_texture_storage &= (theApp.GetConfig("ogl_texture_storage", 0) == 1);
for (size_t i = 0; i < countof(m_pool); i++) {

View File

@ -268,9 +268,9 @@ BEGIN
CONTROL "Allow 8-Bit Textures",IDC_PALTEX,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,243,82,10
CONTROL "Texture Filtering",IDC_FILTER,"Button",BS_AUTO3STATE | WS_TABSTOP,10,227,67,10
CONTROL "Enable Shade Boost",IDC_SHADEBOOST,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,186,79,10
CONTROL "Accurate Blend",IDC_ACCURATE_BLEND,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,259,67,10
CONTROL "Accurate Blend",IDC_ACCURATE_BLEND,"Button",BS_AUTO3STATE | WS_TABSTOP,10,259,67,10
CONTROL "Accurate Date",IDC_ACCURATE_DATE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,92,259,67,10
CONTROL "Accurate color clipping",IDC_ACCURATE_COLCLIP,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,275,87,10
CONTROL "Accurate color clip",IDC_ACCURATE_COLCLIP,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,275,74,10
PUSHBUTTON "Settings...",IDC_SHADEBUTTON,92,183,75,14
CONTROL "Enable HW Hacks",IDC_HACKS_ENABLED,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,291,71,10
PUSHBUTTON "Configure...",IDC_HACKSBUTTON,92,288,75,14
@ -282,6 +282,7 @@ BEGIN
COMBOBOX IDC_AFCOMBO,93,304,35,30,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "OpenCL Device:",IDC_STATIC,6,86,52,8
COMBOBOX IDC_OPENCL_DEVICE,70,84,111,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
CONTROL "HW OGL Depth",IDC_TC_DEPTH,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,92,273,72,13
END

View File

@ -2078,6 +2078,9 @@
<ItemGroup>
<ResourceCompile Include="GSdx.rc" />
</ItemGroup>
<ItemGroup>
<Image Include="res\logo-ogl.bmp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
@ -2086,4 +2089,4 @@
<UserProperties RESOURCE_FILE="GSdx.rc" />
</VisualStudio>
</ProjectExtensions>
</Project>
</Project>

View File

@ -720,9 +720,6 @@
</None>
<None Include="res\logo9.bmp">
<Filter>Resource Files</Filter>
</None>
<None Include="res\logo-ogl.bmp">
<Filter>Resource Files</Filter>
</None>
<None Include="res\convert.fx">
<Filter>Shaders</Filter>
@ -755,10 +752,18 @@
<None Include="res\tfx.cl">
<Filter>Shaders</Filter>
</None>
<None Include="res\logo_ogl.bmp">
<Filter>Resource Files</Filter>
</None>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="GSdx.rc">
<Filter>Resource Files</Filter>
</ResourceCompile>
</ItemGroup>
</Project>
<ItemGroup>
<Image Include="res\logo-ogl.bmp">
<Filter>Resource Files</Filter>
</Image>
</ItemGroup>
</Project>

View File

@ -70,6 +70,11 @@ layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
layout(binding = 0) uniform sampler2D TextureSampler;
#endif
layout(std140, binding = 15) uniform cb15
{
ivec4 ScalingFactor;
};
vec4 sample_c()
{
return texture(TextureSampler, PSin_t );
@ -183,6 +188,101 @@ void ps_main12()
}
#endif
#ifdef ps_main13
out float gl_FragDepth;
void ps_main13()
{
// Same as above but without the alpha channel
// Convert a RRGBA texture into a float depth texture
// FIXME: I'm afraid of the accuracy
const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 0.0) * vec4(255.0/256.0);
gl_FragDepth = dot(sample_c(), bitSh);
}
#endif
#ifdef ps_main14
void ps_main14()
{
// Potential speed optimization. There is a high probability that
// game only want to extract a single channel (blue). It will allow
// to remove most of the conditional operation and yield a +2/3 fps
// boost on MGS3
//
// Hypothesis wrong in Prince of Persia ... Seriously WTF !
//#define ONLY_BLUE;
// Convert a RGBA texture into a 8 bits packed texture
// Input column: 8x2 RGBA pixels
// 0: 8 RGBA
// 1: 8 RGBA
// Output column: 16x4 Index pixels
// 0: 8 R | 8 B
// 1: 8 R | 8 B
// 2: 8 G | 8 A
// 3: 8 G | 8 A
float c;
uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);
ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1u);
int ty = tb.y | (int(gl_FragCoord.y) & 1);
int txN = tb.x | (int(gl_FragCoord.x) & 7);
int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);
txN *= ScalingFactor.x;
txH *= ScalingFactor.x;
ty *= ScalingFactor.y;
// TODO investigate texture gather
vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);
vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);
if ((sel.y & 4u) == 0u) {
// Column 0 and 2
#ifdef ONLY_BLUE
c = cN.b;
#else
if ((sel.y & 3u) < 2u) {
// first 2 lines of the col
if (sel.x < 8u)
c = cN.r;
else
c = cN.b;
} else {
if (sel.x < 8u)
c = cH.g;
else
c = cH.a;
}
#endif
} else {
#ifdef ONLY_BLUE
c = cH.b;
#else
// Column 1 and 3
if ((sel.y & 3u) < 2u) {
// first 2 lines of the col
if (sel.x < 8u)
c = cH.r;
else
c = cH.b;
} else {
if (sel.x < 8u)
c = cN.g;
else
c = cN.a;
}
#endif
}
SV_Target0 = vec4(c);
}
#endif
#ifdef ps_main7
void ps_main7()
{

View File

@ -48,7 +48,10 @@ layout(binding = 3) uniform sampler2D RtSampler; // note 2 already use by the im
#if PS_DATE > 0
// FIXME how to declare memory access
layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min;
layout(early_fragment_tests) in;
// Don't enable it. Discard fragment can still write in the depth buffer
// it breaks shadow in Shin Megami Tensei Nocturne
//layout(early_fragment_tests) in;
// I don't remember why I set this parameter but it is surely useless
//layout(pixel_center_integer) in vec4 gl_FragCoord;
#endif
@ -65,10 +68,12 @@ layout(std140, binding = 21) uniform cb21
vec2 MinF;
vec2 TA;
uvec4 MskFix;
vec4 Af;
uvec4 FbMask;
vec3 _not_yet_used;
float Af;
vec4 HalfTexel;
vec4 MinMax;
vec4 TC_OffsetHack;
vec2 TC_OffsetHack;
};
#ifdef SUBROUTINE_GL40
@ -392,6 +397,18 @@ vec4 ps_color()
return c;
}
void ps_fbmask(inout vec4 c)
{
// FIXME do I need special case for 16 bits
#if PS_FBMASK
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f);
uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);
c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f;
#endif
}
#if PS_BLEND > 0
void ps_blend(inout vec4 c, in float As)
{
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
@ -428,7 +445,7 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 6
// 6 => *0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F
c.rgb = Cs * (Af.x + 1.0f) - Cd * Af.x;
c.rgb = Cs * (Af + 1.0f) - Cd * Af;
#elif PS_BLEND == 7
// 7 => *0200: (Cs - 0)*As + Cs ==> Cs*(As + 1)
@ -440,7 +457,7 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 9
// 9 => *0220: (Cs - 0)*F + Cs ==> Cs*(F + 1)
c.rgb = Cs * (Af.x + 1.0f);
c.rgb = Cs * (Af + 1.0f);
#elif PS_BLEND == 10
// 10 => *1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As
@ -452,7 +469,7 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 12
// 12 => *1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F
c.rgb = Cd * (Af.x + 1.0f) - Cs * Af.x;
c.rgb = Cd * (Af + 1.0f) - Cs * Af;
#elif PS_BLEND == 13
// 13 => 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As)
@ -472,11 +489,11 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 17
// 17 => 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F)
c.rgb = Cs * Af.x + Cd * (1.0f - Af.x);
c.rgb = Cs * Af + Cd * (1.0f - Af);
#elif PS_BLEND == 18
// 18 => 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F
c.rgb = Cs * Af.x - Cd * Af.x;
c.rgb = Cs * Af - Cd * Af;
#elif PS_BLEND == 19
// 19 => 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd
@ -496,11 +513,11 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 23
// 23 => 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd
c.rgb = Cs * Af.x + Cd;
c.rgb = Cs * Af + Cd;
#elif PS_BLEND == 24
// 24 => 0222: (Cs - 0)*F + 0 ==> Cs*F
c.rgb = Cs * Af.x;
c.rgb = Cs * Af;
#elif PS_BLEND == 25
// 25 => 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As)
@ -520,11 +537,11 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 29
// 29 => 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F)
c.rgb = Cd * Af.x + Cs * (1.0f - Af.x);
c.rgb = Cd * Af + Cs * (1.0f - Af);
#elif PS_BLEND == 30
// 30 => 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F
c.rgb = Cd * Af.x - Cs * Af.x;
c.rgb = Cd * Af - Cs * Af;
#elif PS_BLEND == 31
// 31 => 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As
@ -552,15 +569,15 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 35
// 35 => 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F
c.rgb = Cs + Cd * Af.x;
c.rgb = Cs + Cd * Af;
#elif PS_BLEND == 57
// C_CLR | 57 => #1221: (Cd - 0)*F + Cd ==> Cd*(1 + F)
c.rgb = Cd * (1.0f + Af.x);
c.rgb = Cd * (1.0f + Af);
#elif PS_BLEND == 36
// 36 => 1222: (Cd - 0)*F + 0 ==> Cd*F
c.rgb = Cd * Af.x;
c.rgb = Cd * Af;
#elif PS_BLEND == 37
// 37 => 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As)
@ -588,15 +605,15 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 43
// 43 => 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F)
c.rgb = Cs * (1.0f - Af.x);
c.rgb = Cs * (1.0f - Af);
#elif PS_BLEND == 44
// 44 => 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F
c.rgb = Cd - Cs * Af.x;
c.rgb = Cd - Cs * Af;
#elif PS_BLEND == 45
// 45 => 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F
c.rgb = - Cs * Af.x;
c.rgb = - Cs * Af;
#elif PS_BLEND == 46
// 46 => 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As
@ -624,15 +641,15 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 52
// 52 => 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F
c.rgb = Cs - Cd * Af.x;
c.rgb = Cs - Cd * Af;
#elif PS_BLEND == 53
// 53 => 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F)
c.rgb = Cd * (1.0f - Af.x);
c.rgb = Cd * (1.0f - Af);
#elif PS_BLEND == 54
// 54 => 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F
c.rgb = - Cd * Af.x;
c.rgb = - Cd * Af;
#endif
@ -644,19 +661,23 @@ void ps_blend(inout vec4 c, in float As)
c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));
#endif
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
// GS: Color = 1, Alpha = 255 => output 1
// GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875
#if PS_DFMT == FMT_16
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
// Basically we want to do 'c.rgb &= 0xF8' in denormalized mode
c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;
c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xF8)) / 255.0f;
#elif PS_COLCLIP == 3
// Basically we want to do 'c.rgb &= 0xFF' in denormalized mode
c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;
c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xFF)) / 255.0f;
#endif
// Don't compile => unable to find compatible overloaded function "mod(vec3)"
//c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;
}
#endif
void ps_main()
{
@ -687,7 +708,7 @@ void ps_main()
#endif
#if PS_DATE == 3 && !defined(DISABLE_GL42_image)
int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));
int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r;
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
// the bad alpha value so we must keep it.
@ -708,6 +729,32 @@ void ps_main()
#endif
#if (APITRACE_DEBUG & 8) == 8
c.a = 0.5f;
#endif
#if PS_SHUFFLE
uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
// Write RB part. Mask will take care of the correct destination
#if PS_READ_BA
c.rb = c.bb;
#else
c.rb = c.rr;
#endif
// Write GA part. Mask will take care of the correct destination
#if PS_READ_BA
if (bool(denorm_c.a & 0x80u))
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
else
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
#else
if (bool(denorm_c.g & 0x80u))
c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
else
c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
#endif
#endif
// Must be done before alpha correction
@ -743,6 +790,8 @@ void ps_main()
ps_blend(c, alpha);
#endif
ps_fbmask(c);
SV_Target0 = c;
SV_Target1 = vec4(alpha, alpha, alpha, alpha);
}

View File

@ -95,6 +95,11 @@ static const char* convert_glsl =
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
"#endif\n"
"\n"
"layout(std140, binding = 15) uniform cb15\n"
"{\n"
" ivec4 ScalingFactor;\n"
"};\n"
"\n"
"vec4 sample_c()\n"
"{\n"
" return texture(TextureSampler, PSin_t );\n"
@ -208,6 +213,101 @@ static const char* convert_glsl =
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main13\n"
"out float gl_FragDepth;\n"
"void ps_main13()\n"
"{\n"
" // Same as above but without the alpha channel\n"
"\n"
" // Convert a RRGBA texture into a float depth texture\n"
" // FIXME: I'm afraid of the accuracy\n"
" const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 0.0) * vec4(255.0/256.0);\n"
" gl_FragDepth = dot(sample_c(), bitSh);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main14\n"
"void ps_main14()\n"
"{\n"
"\n"
" // Potential speed optimization. There is a high probability that\n"
" // game only want to extract a single channel (blue). It will allow\n"
" // to remove most of the conditional operation and yield a +2/3 fps\n"
" // boost on MGS3\n"
" //\n"
" // Hypothesis wrong in Prince of Persia ... Seriously WTF !\n"
"//#define ONLY_BLUE;\n"
"\n"
" // Convert a RGBA texture into a 8 bits packed texture\n"
" // Input column: 8x2 RGBA pixels\n"
" // 0: 8 RGBA\n"
" // 1: 8 RGBA\n"
" // Output column: 16x4 Index pixels\n"
" // 0: 8 R | 8 B\n"
" // 1: 8 R | 8 B\n"
" // 2: 8 G | 8 A\n"
" // 3: 8 G | 8 A\n"
" float c;\n"
"\n"
" uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n"
" ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1u);\n"
"\n"
" int ty = tb.y | (int(gl_FragCoord.y) & 1);\n"
" int txN = tb.x | (int(gl_FragCoord.x) & 7);\n"
" int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n"
"\n"
" txN *= ScalingFactor.x;\n"
" txH *= ScalingFactor.x;\n"
" ty *= ScalingFactor.y;\n"
"\n"
" // TODO investigate texture gather\n"
" vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n"
" vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n"
"\n"
"\n"
" if ((sel.y & 4u) == 0u) {\n"
" // Column 0 and 2\n"
"#ifdef ONLY_BLUE\n"
" c = cN.b;\n"
"#else\n"
" if ((sel.y & 3u) < 2u) {\n"
" // first 2 lines of the col\n"
" if (sel.x < 8u)\n"
" c = cN.r;\n"
" else\n"
" c = cN.b;\n"
" } else {\n"
" if (sel.x < 8u)\n"
" c = cH.g;\n"
" else\n"
" c = cH.a;\n"
" }\n"
"#endif\n"
" } else {\n"
"#ifdef ONLY_BLUE\n"
" c = cH.b;\n"
"#else\n"
" // Column 1 and 3\n"
" if ((sel.y & 3u) < 2u) {\n"
" // first 2 lines of the col\n"
" if (sel.x < 8u)\n"
" c = cH.r;\n"
" else\n"
" c = cH.b;\n"
" } else {\n"
" if (sel.x < 8u)\n"
" c = cN.g;\n"
" else\n"
" c = cN.a;\n"
" }\n"
"#endif\n"
" }\n"
"\n"
"\n"
" SV_Target0 = vec4(c);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main7\n"
"void ps_main7()\n"
"{\n"
@ -827,7 +927,10 @@ static const char* tfx_fs_all_glsl =
"#if PS_DATE > 0\n"
"// FIXME how to declare memory access\n"
"layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min;\n"
"layout(early_fragment_tests) in;\n"
"// Don't enable it. Discard fragment can still write in the depth buffer\n"
"// it breaks shadow in Shin Megami Tensei Nocturne\n"
"//layout(early_fragment_tests) in;\n"
"\n"
"// I don't remember why I set this parameter but it is surely useless\n"
"//layout(pixel_center_integer) in vec4 gl_FragCoord;\n"
"#endif\n"
@ -844,10 +947,12 @@ static const char* tfx_fs_all_glsl =
" vec2 MinF;\n"
" vec2 TA;\n"
" uvec4 MskFix;\n"
" vec4 Af;\n"
" uvec4 FbMask;\n"
" vec3 _not_yet_used;\n"
" float Af;\n"
" vec4 HalfTexel;\n"
" vec4 MinMax;\n"
" vec4 TC_OffsetHack;\n"
" vec2 TC_OffsetHack;\n"
"};\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
@ -1171,6 +1276,18 @@ static const char* tfx_fs_all_glsl =
" return c;\n"
"}\n"
"\n"
"void ps_fbmask(inout vec4 c)\n"
"{\n"
" // FIXME do I need special case for 16 bits\n"
"#if PS_FBMASK\n"
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
" uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f);\n"
" uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n"
" c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f;\n"
"#endif\n"
"}\n"
"\n"
"#if PS_BLEND > 0\n"
"void ps_blend(inout vec4 c, in float As)\n"
"{\n"
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
@ -1207,7 +1324,7 @@ static const char* tfx_fs_all_glsl =
"\n"
"#elif PS_BLEND == 6\n"
" // 6 => *0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F\n"
" c.rgb = Cs * (Af.x + 1.0f) - Cd * Af.x;\n"
" c.rgb = Cs * (Af + 1.0f) - Cd * Af;\n"
"\n"
"#elif PS_BLEND == 7\n"
" // 7 => *0200: (Cs - 0)*As + Cs ==> Cs*(As + 1)\n"
@ -1219,7 +1336,7 @@ static const char* tfx_fs_all_glsl =
"\n"
"#elif PS_BLEND == 9\n"
" // 9 => *0220: (Cs - 0)*F + Cs ==> Cs*(F + 1)\n"
" c.rgb = Cs * (Af.x + 1.0f);\n"
" c.rgb = Cs * (Af + 1.0f);\n"
"\n"
"#elif PS_BLEND == 10\n"
" // 10 => *1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As\n"
@ -1231,7 +1348,7 @@ static const char* tfx_fs_all_glsl =
"\n"
"#elif PS_BLEND == 12\n"
" // 12 => *1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F\n"
" c.rgb = Cd * (Af.x + 1.0f) - Cs * Af.x;\n"
" c.rgb = Cd * (Af + 1.0f) - Cs * Af;\n"
"\n"
"#elif PS_BLEND == 13\n"
" // 13 => 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As)\n"
@ -1251,11 +1368,11 @@ static const char* tfx_fs_all_glsl =
"\n"
"#elif PS_BLEND == 17\n"
" // 17 => 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F)\n"
" c.rgb = Cs * Af.x + Cd * (1.0f - Af.x);\n"
" c.rgb = Cs * Af + Cd * (1.0f - Af);\n"
"\n"
"#elif PS_BLEND == 18\n"
" // 18 => 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F\n"
" c.rgb = Cs * Af.x - Cd * Af.x;\n"
" c.rgb = Cs * Af - Cd * Af;\n"
"\n"
"#elif PS_BLEND == 19\n"
" // 19 => 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd\n"
@ -1275,11 +1392,11 @@ static const char* tfx_fs_all_glsl =
"\n"
"#elif PS_BLEND == 23\n"
" // 23 => 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd\n"
" c.rgb = Cs * Af.x + Cd;\n"
" c.rgb = Cs * Af + Cd;\n"
"\n"
"#elif PS_BLEND == 24\n"
" // 24 => 0222: (Cs - 0)*F + 0 ==> Cs*F\n"
" c.rgb = Cs * Af.x;\n"
" c.rgb = Cs * Af;\n"
"\n"
"#elif PS_BLEND == 25\n"
" // 25 => 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As)\n"
@ -1299,11 +1416,11 @@ static const char* tfx_fs_all_glsl =
"\n"
"#elif PS_BLEND == 29\n"
" // 29 => 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F)\n"
" c.rgb = Cd * Af.x + Cs * (1.0f - Af.x);\n"
" c.rgb = Cd * Af + Cs * (1.0f - Af);\n"
"\n"
"#elif PS_BLEND == 30\n"
" // 30 => 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F\n"
" c.rgb = Cd * Af.x - Cs * Af.x;\n"
" c.rgb = Cd * Af - Cs * Af;\n"
"\n"
"#elif PS_BLEND == 31\n"
" // 31 => 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As\n"
@ -1331,15 +1448,15 @@ static const char* tfx_fs_all_glsl =
"\n"
"#elif PS_BLEND == 35\n"
" // 35 => 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F\n"
" c.rgb = Cs + Cd * Af.x;\n"
" c.rgb = Cs + Cd * Af;\n"
"\n"
"#elif PS_BLEND == 57\n"
" // C_CLR | 57 => #1221: (Cd - 0)*F + Cd ==> Cd*(1 + F)\n"
" c.rgb = Cd * (1.0f + Af.x);\n"
" c.rgb = Cd * (1.0f + Af);\n"
"\n"
"#elif PS_BLEND == 36\n"
" // 36 => 1222: (Cd - 0)*F + 0 ==> Cd*F\n"
" c.rgb = Cd * Af.x;\n"
" c.rgb = Cd * Af;\n"
"\n"
"#elif PS_BLEND == 37\n"
" // 37 => 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As)\n"
@ -1367,15 +1484,15 @@ static const char* tfx_fs_all_glsl =
"\n"
"#elif PS_BLEND == 43\n"
" // 43 => 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F)\n"
" c.rgb = Cs * (1.0f - Af.x);\n"
" c.rgb = Cs * (1.0f - Af);\n"
"\n"
"#elif PS_BLEND == 44\n"
" // 44 => 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F\n"
" c.rgb = Cd - Cs * Af.x;\n"
" c.rgb = Cd - Cs * Af;\n"
"\n"
"#elif PS_BLEND == 45\n"
" // 45 => 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F\n"
" c.rgb = - Cs * Af.x;\n"
" c.rgb = - Cs * Af;\n"
"\n"
"#elif PS_BLEND == 46\n"
" // 46 => 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As\n"
@ -1403,15 +1520,15 @@ static const char* tfx_fs_all_glsl =
"\n"
"#elif PS_BLEND == 52\n"
" // 52 => 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F\n"
" c.rgb = Cs - Cd * Af.x;\n"
" c.rgb = Cs - Cd * Af;\n"
"\n"
"#elif PS_BLEND == 53\n"
" // 53 => 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F)\n"
" c.rgb = Cd * (1.0f - Af.x);\n"
" c.rgb = Cd * (1.0f - Af);\n"
"\n"
"#elif PS_BLEND == 54\n"
" // 54 => 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F\n"
" c.rgb = - Cd * Af.x;\n"
" c.rgb = - Cd * Af;\n"
"\n"
"#endif\n"
"\n"
@ -1423,19 +1540,23 @@ static const char* tfx_fs_all_glsl =
" c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));\n"
"#endif\n"
"\n"
" // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy\n"
" // GS: Color = 1, Alpha = 255 => output 1\n"
" // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875\n"
"#if PS_DFMT == FMT_16\n"
" // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
"\n"
" // Basically we want to do 'c.rgb &= 0xF8' in denormalized mode\n"
" c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;\n"
" c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xF8)) / 255.0f;\n"
"#elif PS_COLCLIP == 3\n"
" // Basically we want to do 'c.rgb &= 0xFF' in denormalized mode\n"
" c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;\n"
" c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xFF)) / 255.0f;\n"
"#endif\n"
"\n"
" // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n"
" //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n"
"}\n"
"#endif\n"
"\n"
"void ps_main()\n"
"{\n"
@ -1466,7 +1587,7 @@ static const char* tfx_fs_all_glsl =
"#endif\n"
"\n"
"#if PS_DATE == 3 && !defined(DISABLE_GL42_image)\n"
" int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));\n"
" int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r;\n"
" // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n"
" // the bad alpha value so we must keep it.\n"
"\n"
@ -1489,6 +1610,32 @@ static const char* tfx_fs_all_glsl =
" c.a = 0.5f;\n"
"#endif\n"
"\n"
"#if PS_SHUFFLE\n"
" uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n"
" uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);\n"
"\n"
" // Write RB part. Mask will take care of the correct destination\n"
"#if PS_READ_BA\n"
" c.rb = c.bb;\n"
"#else\n"
" c.rb = c.rr;\n"
"#endif\n"
"\n"
" // Write GA part. Mask will take care of the correct destination\n"
"#if PS_READ_BA\n"
" if (bool(denorm_c.a & 0x80u))\n"
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
" else\n"
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
"#else\n"
" if (bool(denorm_c.g & 0x80u))\n"
" c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
" else\n"
" c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
" // Must be done before alpha correction\n"
" float alpha = c.a * 255.0f / 128.0f;\n"
"\n"
@ -1522,6 +1669,8 @@ static const char* tfx_fs_all_glsl =
" ps_blend(c, alpha);\n"
"#endif\n"
"\n"
" ps_fbmask(c);\n"
"\n"
" SV_Target0 = c;\n"
" SV_Target1 = vec4(alpha, alpha, alpha, alpha);\n"
"}\n"

View File

@ -39,6 +39,8 @@
#define PS_SPRITEHACK 0
#define PS_TCOFFSETHACK 0
#define PS_POINT_SAMPLER 0
#define PS_SHUFFLE 0
#define PS_READ_BA 0
#endif
struct VS_INPUT
@ -712,6 +714,38 @@ PS_OUTPUT ps_main(PS_INPUT input)
PS_OUTPUT output;
if (PS_SHUFFLE){
uint4 denorm_c = uint4(c * 255.0f + 0.5f);
uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f);
// Mask will take care of the correct destination
if (PS_READ_BA){
c.rb = c.bb;
}
else {
c.rb = c.rr;
}
c.g = c.a;
if (PS_READ_BA){
if (denorm_c.a & 0x80)
c.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f;
else
c.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f;
//c.g = c.a;
}
else {
if (denorm_c.g & 0x80)
c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f;
else
c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f;
//c.g = c.a;
}
//Probably not right :/
//c.g = c.b;
}
output.c1 = c.a * 2; // used for alpha blending
if(PS_AOUT) // 16 bit output

View File

@ -79,6 +79,8 @@
#define IDC_ROUND_SPRITE 2095
#define IDC_ALIGN_SPRITE 2096
#define IDC_CRC_LEVEL 2097
#define IDC_CHECK1 2098
#define IDC_TC_DEPTH 2099
#define IDC_COLORSPACE 3000
#define IDR_CONVERT_FX 10000
#define IDR_TFX_FX 10001
@ -99,7 +101,7 @@
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 10013
#define _APS_NEXT_COMMAND_VALUE 32771
#define _APS_NEXT_CONTROL_VALUE 2099
#define _APS_NEXT_CONTROL_VALUE 2100
#define _APS_NEXT_SYMED_VALUE 5000
#endif
#endif