GS/HW: Texture cache improvements

Change texture scale from vector to scalar

 - Independent X and Y scaling is long gone.
 - Also separate size and scale in TC lookup

Move clear value for texture to base class

Align heights to page size

 - Since FRAME and Z are in page units, we can't have two targets
 - overlapping within the same page.
 - Stops some small resizes too.
 - Test cases: Genji and Spider-Man 2 shadows.

Don't modify target TEX0 on shuffle/clear

Move upscale multiplier to uniform

Make P8 conversion page-aware

Fix incorrect depth preload shader

Improve HLE of texture shuffles

When a texture shuffle is split into two half-screen draws, we skip the
first, and draw the whole thing in the second, taking care of when both
the texture and framebuffer are offset.
This commit is contained in:
Stenzek 2023-03-12 21:05:25 +10:00 committed by refractionpcsx2
parent 2fdea258fa
commit 435e73d838
50 changed files with 967 additions and 646 deletions

View File

@ -518,7 +518,6 @@ SCAJ-20010:
name: "Bakusou Dekotora Densetsu - Otoko Hanamichi Yume Roman"
region: "NTSC-Unk"
gsHWFixes:
getSkipCount: "GSC_BigMuthaTruckers"
beforeDraw: "OI_BigMuthaTruckers"
SCAJ-20011:
name: "Armored Core 3 - Silent Line"
@ -12696,7 +12695,6 @@ SLES-51355:
region: "PAL-M5"
compat: 5
gsHWFixes:
getSkipCount: "GSC_BigMuthaTruckers"
beforeDraw: "OI_BigMuthaTruckers"
SLES-51356:
name: "Road Trip Adventure"
@ -28745,7 +28743,6 @@ SLPM-65234:
name: "Bakusou Dekotora Densetsu - Otoko Hanamichi Yume Roman"
region: "NTSC-J"
gsHWFixes:
getSkipCount: "GSC_BigMuthaTruckers"
beforeDraw: "OI_BigMuthaTruckers"
SLPM-65235:
name: "New Roommania - Porori Seishun"
@ -42385,7 +42382,6 @@ SLUS-20291:
region: "NTSC-U"
compat: 5
gsHWFixes:
getSkipCount: "GSC_BigMuthaTruckers"
beforeDraw: "OI_BigMuthaTruckers"
SLUS-20292:
name: "Tsugunai - Atonement"
@ -43904,7 +43900,6 @@ SLUS-20605:
name: "Big Mutha Truckers"
region: "NTSC-U"
gsHWFixes:
getSkipCount: "GSC_BigMuthaTruckers"
beforeDraw: "OI_BigMuthaTruckers"
SLUS-20606:
name: "Bounty Hunter - Seek & Destroy"

View File

@ -1,9 +1,5 @@
#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency
#ifndef PS_SCALE_FACTOR
#define PS_SCALE_FACTOR 1
#endif
struct VS_INPUT
{
float4 p : POSITION;
@ -24,7 +20,6 @@ cbuffer cb0 : register(b0)
int EMODA;
int EMODC;
int DOFFSET;
int cb0_pad;
};
static const float3x3 rgb2yuv =
@ -274,16 +269,25 @@ PS_OUTPUT ps_convert_rgba_8i(PS_INPUT input)
uint2 subblock = pos & uint2(7u, 1u);
uint2 coord = block | subblock;
// Compensate for potentially differing page pitch.
uint SBW = uint(EMODA);
uint DBW = uint(EMODC);
uint2 block_xy = coord / uint2(64, 32);
uint block_num = (block_xy.y * (DBW / 128)) + block_xy.x;
uint2 block_offset = uint2((block_num % (SBW / 64)) * 64, (block_num / (SBW / 64)) * 32);
coord = (coord % uint2(64, 32)) + block_offset;
// Apply offset to cols 1 and 2
uint is_col23 = pos.y & 4u;
uint is_col13 = pos.y & 2u;
uint is_col12 = is_col23 ^ (is_col13 << 1);
coord.x ^= is_col12; // If cols 1 or 2, flip bit 3 of x
if (floor(PS_SCALE_FACTOR) != PS_SCALE_FACTOR)
coord = uint2(float2(coord) * PS_SCALE_FACTOR);
float ScaleFactor = BGColor.x;
if (floor(ScaleFactor) != ScaleFactor)
coord = uint2(float2(coord) * ScaleFactor);
else
coord *= PS_SCALE_FACTOR;
coord *= uint(ScaleFactor);
float4 pixel = Texture.Load(int3(int2(coord), 0));
float2 sel0 = (pos.y & 2u) == 0u ? pixel.rb : pixel.ga;
@ -295,7 +299,7 @@ PS_OUTPUT ps_convert_rgba_8i(PS_INPUT input)
PS_OUTPUT ps_convert_clut_4(PS_INPUT input)
{
// Borrowing the YUV constant buffer.
float2 scale = BGColor.xy;
float scale = BGColor.x;
uint2 offset = uint2(uint(EMODA), uint(EMODC)) + uint(DOFFSET);
// CLUT4 is easy, just two rows of 8x8.
@ -310,7 +314,7 @@ PS_OUTPUT ps_convert_clut_4(PS_INPUT input)
PS_OUTPUT ps_convert_clut_8(PS_INPUT input)
{
float2 scale = BGColor.xy;
float scale = BGColor.x;
uint2 offset = uint2(uint(EMODA), uint(EMODC));
uint index = min(uint(input.p.x) + uint(DOFFSET), 255u);

View File

@ -46,7 +46,6 @@
#define PS_CHANNEL_FETCH 0
#define PS_TALES_OF_ABYSS_HLE 0
#define PS_URBAN_CHAOS_HLE 0
#define PS_SCALE_FACTOR 1.0
#define PS_HDR 0
#define PS_COLCLIP 0
#define PS_BLEND_A 0
@ -171,6 +170,8 @@ cbuffer cb1
float2 TC_OffsetHack;
float2 STScale;
float4x4 DitherMatrix;
float ScaledScaleFactor;
float RcpScaleFactor;
};
float4 sample_c(float2 uv, float uv_w)
@ -402,7 +403,7 @@ int2 clamp_wrap_uv_depth(int2 uv)
float4 sample_depth(float2 st, float2 pos)
{
float2 uv_f = (float2)clamp_wrap_uv_depth(int2(st)) * (float2)PS_SCALE_FACTOR * (float2)(1.0f / 16.0f);
float2 uv_f = (float2)clamp_wrap_uv_depth(int2(st)) * (float2)ScaledScaleFactor;
int2 uv = (int2)uv_f;
float4 t = (float4)(0.0f);
@ -742,7 +743,7 @@ void ps_dither(inout float3 C, float2 pos_xy)
if (PS_DITHER == 2)
fpos = int2(pos_xy);
else
fpos = int2(pos_xy / (float)PS_SCALE_FACTOR);
fpos = int2(pos_xy * RcpScaleFactor);
float value = DitherMatrix[fpos.x & 3][fpos.y & 3];
if (PS_ROUND_INV)

View File

@ -88,6 +88,9 @@ layout(std140, binding = 0) uniform cb21
vec2 STScale;
mat4 DitherMatrix;
float ScaledScaleFactor;
float RcpScaleFactor;
};
#endif

View File

@ -234,6 +234,10 @@ void ps_convert_rgb5a1_float16_biln()
#endif
#ifdef ps_convert_rgba_8i
uniform uint SBW;
uniform uint DBW;
uniform float ScaleFactor;
void ps_convert_rgba_8i()
{
// Convert a RGBA texture into a 8 bits packed texture
@ -252,16 +256,22 @@ void ps_convert_rgba_8i()
uvec2 subblock = pos & uvec2(7u, 1u);
uvec2 coord = block | subblock;
// Compensate for potentially differing page pitch.
uvec2 block_xy = coord / uvec2(64u, 32u);
uint block_num = (block_xy.y * (DBW / 128u)) + block_xy.x;
uvec2 block_offset = uvec2((block_num % (SBW / 64u)) * 64u, (block_num / (SBW / 64u)) * 32u);
coord = (coord % uvec2(64u, 32u)) + block_offset;
// Apply offset to cols 1 and 2
uint is_col23 = pos.y & 4u;
uint is_col13 = pos.y & 2u;
uint is_col12 = is_col23 ^ (is_col13 << 1);
coord.x ^= is_col12; // If cols 1 or 2, flip bit 3 of x
if (floor(PS_SCALE_FACTOR) != PS_SCALE_FACTOR)
coord = uvec2(vec2(coord) * PS_SCALE_FACTOR);
if (floor(ScaleFactor) != ScaleFactor)
coord = uvec2(vec2(coord) * ScaleFactor);
else
coord *= uvec2(PS_SCALE_FACTOR);
coord *= uvec2(ScaleFactor);
vec4 pixel = texelFetch(TextureSampler, ivec2(coord), 0);
vec2 sel0 = (pos.y & 2u) == 0u ? pixel.rb : pixel.ga;
@ -316,7 +326,7 @@ void ps_hdr_resolve()
#ifdef ps_convert_clut_4
uniform uvec3 offset;
uniform vec2 scale;
uniform float scale;
void ps_convert_clut_4()
{
@ -324,14 +334,14 @@ void ps_convert_clut_4()
uint index = uint(gl_FragCoord.x) + offset.z;
uvec2 pos = uvec2(index % 8u, index / 8u);
ivec2 final = ivec2(floor(vec2(offset.xy + pos) * scale));
ivec2 final = ivec2(floor(vec2(offset.xy + pos) * vec2(scale)));
SV_Target0 = texelFetch(TextureSampler, final, 0);
}
#endif
#ifdef ps_convert_clut_8
uniform uvec3 offset;
uniform vec2 scale;
uniform float scale;
void ps_convert_clut_8()
{
@ -344,7 +354,7 @@ void ps_convert_clut_8()
pos.x = (index % 8u) + ((subgroup >= 2u) ? 8u : 0u);
pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
ivec2 final = ivec2(floor(vec2(offset.xy + pos) * scale));
ivec2 final = ivec2(floor(vec2(offset.xy + pos) * vec2(scale)));
SV_Target0 = texelFetch(TextureSampler, final, 0);
}
#endif

View File

@ -328,7 +328,7 @@ ivec2 clamp_wrap_uv_depth(ivec2 uv)
vec4 sample_depth(vec2 st)
{
vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(float(PS_SCALE_FACTOR)) * vec2(1.0f/16.0f);
vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScaledScaleFactor);
ivec2 uv = ivec2(uv_f);
vec4 t = vec4(0.0f);
@ -652,7 +652,7 @@ void ps_dither(inout vec3 C)
#if PS_DITHER == 2
ivec2 fpos = ivec2(gl_FragCoord.xy);
#else
ivec2 fpos = ivec2(gl_FragCoord.xy / float(PS_SCALE_FACTOR));
ivec2 fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
#endif
float value = DitherMatrix[fpos.y&3][fpos.x&3];
#if PS_ROUND_INV

View File

@ -77,7 +77,7 @@ void vs_main()
VSout.t_float.z = i_f.x; // pack for with texture
#if VS_POINT_SIZE
gl_PointSize = float(VS_POINT_SIZE_VALUE);
gl_PointSize = PointSize.x;
#endif
}

View File

@ -1,7 +1,3 @@
#ifndef PS_SCALE_FACTOR
#define PS_SCALE_FACTOR 1.0
#endif
#ifdef VERTEX_SHADER
layout(location = 0) in vec4 a_pos;
@ -244,6 +240,15 @@ void ps_convert_rgb5a1_float16_biln()
#endif
#ifdef ps_convert_rgba_8i
layout(push_constant) uniform cb10
{
uint SBW;
uint DBW;
uvec2 cb_pad1;
float ScaleFactor;
vec3 cb_pad2;
};
void ps_convert_rgba_8i()
{
// Convert a RGBA texture into a 8 bits packed texture
@ -255,37 +260,45 @@ void ps_convert_rgba_8i()
// 1: 8 R | 8 B
// 2: 8 G | 8 A
// 3: 8 G | 8 A
uvec2 pos = uvec2(gl_FragCoord.xy);
uvec2 pos = uvec2(gl_FragCoord.xy);
// Collapse separate R G B A areas into their base pixel
uvec2 block = (pos & ~uvec2(15u, 3u)) >> 1;
uvec2 subblock = pos & uvec2(7u, 1u);
uvec2 coord = block | subblock;
// Collapse separate R G B A areas into their base pixel
uvec2 block = (pos & ~uvec2(15u, 3u)) >> 1;
uvec2 subblock = pos & uvec2(7u, 1u);
uvec2 coord = block | subblock;
// Apply offset to cols 1 and 2
uint is_col23 = pos.y & 4u;
uint is_col13 = pos.y & 2u;
uint is_col12 = is_col23 ^ (is_col13 << 1);
coord.x ^= is_col12; // If cols 1 or 2, flip bit 3 of x
// Compensate for potentially differing page pitch.
uvec2 block_xy = coord / uvec2(64u, 32u);
uint block_num = (block_xy.y * (DBW / 128u)) + block_xy.x;
uvec2 block_offset = uvec2((block_num % (SBW / 64u)) * 64u, (block_num / (SBW / 64u)) * 32u);
coord = (coord % uvec2(64u, 32u)) + block_offset;
if (floor(PS_SCALE_FACTOR) != PS_SCALE_FACTOR)
coord = uvec2(vec2(coord) * PS_SCALE_FACTOR);
else
coord *= uvec2(PS_SCALE_FACTOR);
// Apply offset to cols 1 and 2
uint is_col23 = pos.y & 4u;
uint is_col13 = pos.y & 2u;
uint is_col12 = is_col23 ^ (is_col13 << 1);
coord.x ^= is_col12; // If cols 1 or 2, flip bit 3 of x
vec4 pixel = texelFetch(samp0, ivec2(coord), 0);
vec2 sel0 = (pos.y & 2u) == 0u ? pixel.rb : pixel.ga;
float sel1 = (pos.x & 8u) == 0u ? sel0.x : sel0.y;
o_col0 = vec4(sel1); // Divide by something here?
if (floor(ScaleFactor) != ScaleFactor)
coord = uvec2(vec2(coord) * ScaleFactor);
else
coord *= uvec2(ScaleFactor);
vec4 pixel = texelFetch(samp0, ivec2(coord), 0);
vec2 sel0 = (pos.y & 2u) == 0u ? pixel.rb : pixel.ga;
float sel1 = (pos.x & 8u) == 0u ? sel0.x : sel0.y;
o_col0 = vec4(sel1); // Divide by something here?
}
#endif
#ifdef ps_convert_clut_4
layout(push_constant) uniform cb10
{
vec2 scale;
uvec2 offset;
uint doffset;
uint cb_pad1;
float scale;
vec3 cb_pad2;
};
void ps_convert_clut_4()
@ -294,7 +307,7 @@ void ps_convert_clut_4()
uint index = uint(gl_FragCoord.x) + doffset;
uvec2 pos = uvec2(index % 8u, index / 8u);
ivec2 final = ivec2(floor(vec2(offset + pos) * scale));
ivec2 final = ivec2(floor(vec2(offset + pos) * vec2(scale)));
o_col0 = texelFetch(samp0, final, 0);
}
#endif
@ -302,9 +315,11 @@ void ps_convert_clut_4()
#ifdef ps_convert_clut_8
layout(push_constant) uniform cb10
{
vec2 scale;
uvec2 offset;
uint doffset;
uint cb_pad1;
float scale;
vec3 cb_pad2;
};
void ps_convert_clut_8()
@ -318,7 +333,7 @@ void ps_convert_clut_8()
pos.x = (index % 8u) + ((subgroup >= 2u) ? 8u : 0u);
pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
ivec2 final = ivec2(floor(vec2(offset + pos) * scale));
ivec2 final = ivec2(floor(vec2(offset + pos) * vec2(scale)));
o_col0 = texelFetch(samp0, final, 0);
}
#endif

View File

@ -78,7 +78,7 @@ void main()
#endif
#if VS_POINT_SIZE
gl_PointSize = float(VS_POINT_SIZE_VALUE);
gl_PointSize = PointSize.x;
#endif
vsOut.c = a_c;
@ -336,7 +336,6 @@ void main()
#define PS_CHANNEL_FETCH 0
#define PS_TALES_OF_ABYSS_HLE 0
#define PS_URBAN_CHAOS_HLE 0
#define PS_SCALE_FACTOR 1.0
#define PS_HDR 0
#define PS_COLCLIP 0
#define PS_BLEND_A 0
@ -373,6 +372,8 @@ layout(std140, set = 0, binding = 1) uniform cb1
vec2 TC_OffsetHack;
vec2 STScale;
mat4 DitherMatrix;
float ScaledScaleFactor;
float RcpScaleFactor;
};
layout(location = 0) in VSOutput
@ -592,7 +593,11 @@ vec4 fetch_raw_color(ivec2 xy)
vec4 fetch_c(ivec2 uv)
{
#if PS_TEX_IS_FB
return sample_from_rt();
#else
return texelFetch(Texture, uv, 0);
#endif
}
//////////////////////////////////////////////////////////////////////
@ -641,7 +646,7 @@ ivec2 clamp_wrap_uv_depth(ivec2 uv)
vec4 sample_depth(vec2 st, ivec2 pos)
{
vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(PS_SCALE_FACTOR) * vec2(1.0f / 16.0f);
vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScaledScaleFactor);
ivec2 uv = ivec2(uv_f);
vec4 t = vec4(0.0f);
@ -969,7 +974,7 @@ void ps_dither(inout vec3 C)
#if PS_DITHER == 2
fpos = ivec2(gl_FragCoord.xy);
#else
fpos = ivec2(gl_FragCoord.xy / float(PS_SCALE_FACTOR));
fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
#endif
float value = DitherMatrix[fpos.y & 3][fpos.x & 3];

View File

@ -725,7 +725,6 @@ void GSUpdateConfig(const Pcsx2Config::GSOptions& new_config)
// Options which aren't using the global struct yet, so we need to recreate all GS objects.
if (
GSConfig.UpscaleMultiplier != old_config.UpscaleMultiplier ||
GSConfig.SWExtraThreads != old_config.SWExtraThreads ||
GSConfig.SWExtraThreadsHeight != old_config.SWExtraThreadsHeight)
{

View File

@ -406,6 +406,7 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
u32 CBW;
GSVector2i offset;
GSVector2i size;
float scale;
if (!TEX0.CSM)
{
CBW = 0; // don't care
@ -422,7 +423,7 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
size.y = 1;
}
GSTexture* src = g_gs_renderer->LookupPaletteSource(TEX0.CBP, TEX0.CPSM, CBW, offset, size);
GSTexture* src = g_gs_renderer->LookupPaletteSource(TEX0.CBP, TEX0.CPSM, CBW, offset, &scale, size);
if (src)
{
GSTexture* dst = is_4bit ? m_gpu_clut4 : m_gpu_clut8;
@ -438,7 +439,7 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
GL_PUSH("Update GPU CLUT [CBP=%04X, CPSM=%s, CBW=%u, CSA=%u, Offset=(%d,%d)]",
TEX0.CBP, psm_str(TEX0.CPSM), CBW, TEX0.CSA, offset.x, offset.y);
g_gs_device->UpdateCLUTTexture(src, offset.x, offset.y, dst, dOffset, dst_size);
g_gs_device->UpdateCLUTTexture(src, scale, offset.x, offset.y, dst, dOffset, dst_size);
m_current_gpu_clut = dst;
}
}

View File

@ -28,6 +28,7 @@
#define PAGE_SIZE 8192u
#define BLOCK_SIZE 256u
#define COLUMN_SIZE 64u
#define BLOCKS_PER_PAGE (PAGE_SIZE / BLOCK_SIZE)
#define MAX_PAGES (VM_SIZE / PAGE_SIZE)
#define MAX_BLOCKS (VM_SIZE / BLOCK_SIZE)

View File

@ -100,6 +100,7 @@ GSState::GSState()
m_mipmap = GSConfig.Mipmap;
s_n = 0;
s_transfer_n = 0;
memset(&m_v, 0, sizeof(m_v));
memset(&m_vertex, 0, sizeof(m_vertex));
@ -1780,7 +1781,7 @@ void GSState::Write(const u8* mem, int len)
m_draw_transfers.push_back(new_transfer);
}
GL_CACHE("Write! ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d)",
GL_CACHE("Write! %u ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d)", s_transfer_n,
blit.DBP, blit.DBW, psm_str(blit.DPSM),
m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY,
m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, w, h);
@ -3123,8 +3124,8 @@ __forceinline void GSState::VertexKick(u32 skip)
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
// FIXME: GREG I don't understand the purpose of the m_nativeres check
// It impacts badly the number of draw call in the HW renderer.
// Discard degenerate triangles. For native resolution, we can ignore the subpixel bits,
// because at the boundaries, they're irrelevant.
test |= m_nativeres ? pmin.eq16(pmax).zwzwl() : pmin.eq16(pmax);
break;
default:

View File

@ -1547,6 +1547,11 @@ public:
return GSVector4i(_mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(v.m), (__m64*)p)));
}
__forceinline static GSVector4i loadh(const GSVector2i& v)
{
return loadh(&v);
}
__forceinline static GSVector4i load(const void* pl, const void* ph)
{
return loadh(ph, loadl(pl));

View File

@ -126,7 +126,7 @@ GSTexture* GSDevice::FetchSurface(GSTexture::Type type, int width, int height, i
if (t->GetType() == type && t->GetFormat() == format && t->GetSize() == size && t->GetMipmapLevels() == levels)
{
if (!prefer_new_texture || t->last_frame_used != m_frame)
if (!prefer_new_texture || t->GetLastFrameUsed() != m_frame)
{
m_pool_memory_usage -= t->GetMemUsage();
m_pool.erase(i);
@ -157,8 +157,6 @@ GSTexture* GSDevice::FetchSurface(GSTexture::Type type, int width, int height, i
}
}
t->SetScale(GSVector2(1, 1)); // Things seem to assume that all textures come out of here with scale 1...
switch (type)
{
case GSTexture::Type::RenderTarget:
@ -194,7 +192,7 @@ void GSDevice::Recycle(GSTexture* t)
if (!t)
return;
t->last_frame_used = m_frame;
t->SetLastFrameUsed(m_frame);
m_pool.push_front(t);
m_pool_memory_usage += t->GetMemUsage();
@ -214,7 +212,7 @@ void GSDevice::AgePool()
{
m_frame++;
while (m_pool.size() > 40 && m_frame - m_pool.back()->last_frame_used > 10)
while (m_pool.size() > 40 && m_frame - m_pool.back()->GetLastFrameUsed() > 10)
{
m_pool_memory_usage -= m_pool.back()->GetMemUsage();
delete m_pool.back();

View File

@ -184,7 +184,8 @@ public:
GSVector4 BGColor;
u32 EMODA;
u32 EMODC;
u32 pad[2];
u32 DOFFSET;
float ScaleFactor;
};
class InterlaceConstantBuffer
@ -568,6 +569,8 @@ struct alignas(16) GSHWDrawConfig
GSVector4 DitherMatrix[4];
GSVector4 ScaleFactor;
__fi PSConstantBuffer()
{
memset(this, 0, sizeof(*this));
@ -845,7 +848,10 @@ public:
static void SortMultiStretchRects(MultiStretchRect* rects, u32 num_rects);
/// Updates a GPU CLUT texture from a source texture.
virtual void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) {}
virtual void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) {}
/// Converts a colour format to an indexed format texture.
virtual void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) {}
virtual void RenderHW(GSHWDrawConfig& config) {}

View File

@ -79,6 +79,7 @@ bool GSRenderer::Merge(int field)
{
GSVector2i fs(0, 0);
GSTexture* tex[3] = { nullptr, nullptr, nullptr };
float tex_scale[3] = { 0.0f, 0.0f, 0.0f };
int y_offset[3] = { 0, 0, 0 };
const bool feedback_merge = m_regs->EXTWRITE.WRITE == 1;
@ -101,18 +102,19 @@ bool GSRenderer::Merge(int field)
// Only need to check the right/bottom on software renderer, hardware always gets the full texture then cuts a bit out later.
if (PCRTCDisplays.FrameRectMatch() && !PCRTCDisplays.FrameWrap() && !feedback_merge)
{
tex[0] = GetOutput(-1, y_offset[0]);
tex[0] = GetOutput(-1, tex_scale[0], y_offset[0]);
tex[1] = tex[0]; // saves one texture fetch
y_offset[1] = y_offset[0];
tex_scale[1] = tex_scale[0];
}
else
{
if (PCRTCDisplays.PCRTCDisplays[0].enabled)
tex[0] = GetOutput(0, y_offset[0]);
tex[0] = GetOutput(0, tex_scale[0], y_offset[0]);
if (PCRTCDisplays.PCRTCDisplays[1].enabled)
tex[1] = GetOutput(1, y_offset[1]);
tex[1] = GetOutput(1, tex_scale[1], y_offset[1]);
if (feedback_merge)
tex[2] = GetFeedbackOutput();
tex[2] = GetFeedbackOutput(tex_scale[2]);
}
if (!tex[0] && !tex[1])
@ -152,7 +154,7 @@ bool GSRenderer::Merge(int field)
if (!curCircuit.enabled || !tex[i])
continue;
GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy();
GSVector4 scale = GSVector4(tex_scale[i]);
// dst is the final destination rect with offset on the screen.
dst[i] = scale * GSVector4(curCircuit.displayRect);
@ -184,7 +186,7 @@ bool GSRenderer::Merge(int field)
if (feedback_merge && tex[2])
{
GSVector4 scale = GSVector4(tex[2]->GetScale()).xyxy();
GSVector4 scale = GSVector4(tex_scale[2]);
GSVector4i feedback_rect;
feedback_rect.left = m_regs->EXTBUF.WDX;
@ -216,7 +218,7 @@ bool GSRenderer::Merge(int field)
if (isReallyInterlaced() && GSConfig.InterlaceMode != GSInterlaceMode::Off)
{
const float offset = is_bob ? (tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y) : 0.0f;
const float offset = is_bob ? (tex[1] ? tex_scale[1] : tex_scale[0]) : 0.0f;
g_gs_device->Interlace(fs, field ^ field2, mode, offset);
}
@ -828,7 +830,7 @@ void GSRenderer::EndCapture()
GSCapture::EndCapture();
}
GSTexture* GSRenderer::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size)
GSTexture* GSRenderer::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size)
{
return nullptr;
}

View File

@ -36,8 +36,8 @@ protected:
bool m_texture_shuffle = false;
bool m_copy_16bit_to_target_shuffle = false;
virtual GSTexture* GetOutput(int i, int& y_offset) = 0;
virtual GSTexture* GetFeedbackOutput() { return nullptr; }
virtual GSTexture* GetOutput(int i, float& scale, int& y_offset) = 0;
virtual GSTexture* GetFeedbackOutput(float& scale) { return nullptr; }
public:
GSRenderer();
@ -50,10 +50,10 @@ public:
virtual void VSync(u32 field, bool registers_written);
virtual bool CanUpscale() { return false; }
virtual float GetUpscaleMultiplier() { return 1.0f; }
virtual GSVector2 GetTextureScaleFactor() { return { 1.0f, 1.0f }; }
virtual float GetTextureScaleFactor() { return 1.0f; }
GSVector2i GetInternalResolution();
virtual GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size);
virtual GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size);
bool SaveSnapshotToMemory(u32 window_width, u32 window_height, bool apply_aspect, bool crop_borders,
u32* width, u32* height, std::vector<u32>* pixels);

View File

@ -21,18 +21,7 @@
#include "common/StringUtil.h"
#include <bitset>
GSTexture::GSTexture()
: m_scale(1, 1)
, m_size(0, 0)
, m_mipmap_levels(0)
, m_type(Type::Invalid)
, m_format(Format::Invalid)
, m_state(State::Dirty)
, m_needs_mipmaps_generated(true)
, last_frame_used(0)
, OffsetHack_modxy(0.0f)
{
}
GSTexture::GSTexture() = default;
bool GSTexture::Save(const std::string& fn)
{
@ -67,15 +56,13 @@ bool GSTexture::Save(const std::string& fn)
void GSTexture::Swap(GSTexture* tex)
{
std::swap(m_scale, tex->m_scale);
std::swap(m_size, tex->m_size);
std::swap(m_mipmap_levels, tex->m_mipmap_levels);
std::swap(m_type, tex->m_type);
std::swap(m_format, tex->m_format);
std::swap(m_state, tex->m_state);
std::swap(m_needs_mipmaps_generated, tex->m_needs_mipmaps_generated);
std::swap(last_frame_used, tex->last_frame_used);
std::swap(OffsetHack_modxy, tex->OffsetHack_modxy);
std::swap(m_last_frame_used, tex->m_last_frame_used);
}
u32 GSTexture::GetCompressedBytesPerBlock() const

View File

@ -58,14 +58,25 @@ public:
Invalidated
};
union alignas(16) ClearValue
{
float color[4];
float depth;
};
protected:
GSVector2 m_scale;
GSVector2i m_size;
int m_mipmap_levels;
Type m_type;
Format m_format;
State m_state;
bool m_needs_mipmaps_generated;
GSVector2i m_size{};
int m_mipmap_levels = 0;
Type m_type = Type::Invalid;
Format m_format = Format::Invalid;
State m_state = State::Dirty;
// frame number (arbitrary base) the texture was recycled on
// different purpose than texture cache ages, do not attempt to merge
u32 m_last_frame_used = 0;
bool m_needs_mipmaps_generated = true;
ClearValue m_clear_value = {};
public:
GSTexture();
@ -82,19 +93,17 @@ public:
virtual void Swap(GSTexture* tex);
virtual u32 GetID() { return 0; }
GSVector2 GetScale() const { return m_scale; }
void SetScale(const GSVector2& scale) { m_scale = scale; }
__fi int GetWidth() const { return m_size.x; }
__fi int GetHeight() const { return m_size.y; }
__fi const GSVector2i& GetSize() const { return m_size; }
__fi GSVector4i GetRect() const { return GSVector4i::loadh(m_size); }
int GetWidth() const { return m_size.x; }
int GetHeight() const { return m_size.y; }
GSVector2i GetSize() const { return m_size; }
GSVector4i GetRect() const { return GSVector4i(m_size).zwxy(); }
int GetMipmapLevels() const { return m_mipmap_levels; }
bool IsMipmap() const { return m_mipmap_levels > 1; }
__fi int GetMipmapLevels() const { return m_mipmap_levels; }
__fi bool IsMipmap() const { return m_mipmap_levels > 1; }
Type GetType() const { return m_type; }
Format GetFormat() const { return m_format; }
bool IsCompressedFormat() const { return IsCompressedFormat(m_format); }
__fi Type GetType() const { return m_type; }
__fi Format GetFormat() const { return m_format; }
__fi bool IsCompressedFormat() const { return IsCompressedFormat(m_format); }
static u32 GetCompressedBytesPerBlock(Format format);
static u32 GetCompressedBlockSize(Format format);
@ -106,31 +115,42 @@ public:
u32 CalcUploadRowLengthFromPitch(u32 pitch) const;
u32 CalcUploadSize(u32 height, u32 pitch) const;
bool IsRenderTargetOrDepthStencil() const
__fi bool IsRenderTargetOrDepthStencil() const
{
return (m_type >= Type::RenderTarget && m_type <= Type::DepthStencil);
}
bool IsRenderTarget() const
__fi bool IsRenderTarget() const
{
return (m_type == Type::RenderTarget);
}
bool IsDepthStencil() const
__fi bool IsDepthStencil() const
{
return (m_type == Type::DepthStencil);
}
State GetState() const { return m_state; }
void SetState(State state) { m_state = state; }
__fi State GetState() const { return m_state; }
__fi void SetState(State state) { m_state = state; }
__fi u32 GetLastFrameUsed() const { return m_last_frame_used; }
void SetLastFrameUsed(u32 frame) { m_last_frame_used = frame; }
__fi GSVector4 GetClearColor() const { return GSVector4::load<false>(m_clear_value.color); }
__fi float GetClearDepth() const { return m_clear_value.depth; }
__fi void SetClearColor(const GSVector4& color)
{
m_state = State::Cleared;
GSVector4::store<false>(m_clear_value.color, color);
}
__fi void SetClearDepth(float depth)
{
m_state = State::Cleared;
m_clear_value.depth = depth;
}
void GenerateMipmapsIfNeeded();
void ClearMipmapGenerationFlag() { m_needs_mipmaps_generated = false; }
// frame number (arbitrary base) the texture was recycled on
// different purpose than texture cache ages, do not attempt to merge
unsigned last_frame_used;
float OffsetHack_modxy;
// Typical size of a RGBA texture
u32 GetMemUsage() const { return m_size.x * m_size.y * (m_format == Format::UNorm8 ? 1 : 4); }

View File

@ -148,14 +148,9 @@ bool GSDevice11::Create()
return false;
}
ShaderMacro sm_convert(m_shader_cache.GetFeatureLevel());
sm_convert.AddMacro("PS_SCALE_FACTOR", StringUtil::ToChars(GSConfig.UpscaleMultiplier));
D3D_SHADER_MACRO* sm_convert_ptr = sm_convert.GetPtr();
for (size_t i = 0; i < std::size(m_convert.ps); i++)
{
m_convert.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *convert_hlsl, sm_convert_ptr, shaderName(static_cast<ShaderConvert>(i)));
m_convert.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *convert_hlsl, sm_model.GetPtr(), shaderName(static_cast<ShaderConvert>(i)));
if (!m_convert.ps[i])
return false;
}
@ -771,17 +766,16 @@ void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
PSSetShaderResources(nullptr, nullptr);
}
void GSDevice11::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
void GSDevice11::UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
// match merge cb
struct Uniforms
{
float scaleX, scaleY;
float pad1[2];
float scale;
float pad1[3];
u32 offsetX, offsetY, dOffset;
u32 pad2;
};
const Uniforms cb = {sTex->GetScale().x, sTex->GetScale().y, {0.0f, 0.0f}, offsetX, offsetY, dOffset};
const Uniforms cb = {sScale, {}, offsetX, offsetY, dOffset};
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
const GSVector4 dRect(0, 0, dSize, 1);
@ -789,6 +783,23 @@ void GSDevice11::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GS
StretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
}
void GSDevice11::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM)
{
// match merge cb
struct Uniforms
{
float scale;
u32 SBW, DBW, pad3;
};
const Uniforms cb = {sScale, {}, SBW, DBW};
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
const GSVector4 dRect(0, 0, dTex->GetWidth(), dTex->GetHeight());
const ShaderConvert shader = ShaderConvert::RGBA_TO_8I;
StretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
}
void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
{
IASetInputLayout(m_convert.il.get());
@ -873,6 +884,7 @@ void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rect
DrawIndexedPrimitive();
}
void GSDevice11::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c, const bool linear)
{
const GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f);

View File

@ -277,7 +277,8 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) override;
void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) override;
void DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds);

View File

@ -138,7 +138,6 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
{
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
sm.AddMacro("PS_SCALE_FACTOR", StringUtil::ToChars(GSConfig.UpscaleMultiplier));
sm.AddMacro("PS_FST", sel.fst);
sm.AddMacro("PS_WMS", sel.wms);
sm.AddMacro("PS_WMT", sel.wmt);

View File

@ -460,16 +460,18 @@ void GSDevice12::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
m_present[static_cast<int>(shader)].get(), linear);
}
void GSDevice12::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
void GSDevice12::UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
// match merge cb
struct Uniforms
{
float scaleX, scaleY;
float pad1[2];
float scale;
float pad1[3];
u32 offsetX, offsetY, dOffset;
u32 pad2;
};
const Uniforms cb = {sTex->GetScale().x, sTex->GetScale().y, {0.0f, 0.0f}, offsetX, offsetY, dOffset};
const Uniforms cb = {sScale, {}, offsetX, offsetY, dOffset};
SetUtilityRootSignature();
SetUtilityPushConstants(&cb, sizeof(cb));
const GSVector4 dRect(0, 0, dSize, 1);
@ -478,6 +480,26 @@ void GSDevice12::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GS
m_convert[static_cast<int>(shader)].get(), false);
}
void GSDevice12::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM)
{
// match merge cb
struct Uniforms
{
float scale;
float pad1[3];
u32 SBW, DBW, pad2;
};
const Uniforms cb = {sScale, {}, SBW, DBW};
SetUtilityRootSignature();
SetUtilityPushConstants(&cb, sizeof(cb));
const GSVector4 dRect(0, 0, dTex->GetWidth(), dTex->GetHeight());
const ShaderConvert shader = ShaderConvert::RGBA_TO_8I;
DoStretchRect(static_cast<GSTexture12*>(sTex), GSVector4::zero(), static_cast<GSTexture12*>(dTex), dRect,
m_convert[static_cast<int>(shader)].get(), false);
}
void GSDevice12::DrawMultiStretchRects(
const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
{
@ -1117,7 +1139,6 @@ GSDevice12::ComPtr<ID3DBlob> GSDevice12::GetUtilityVertexShader(const std::strin
GSDevice12::ComPtr<ID3DBlob> GSDevice12::GetUtilityPixelShader(const std::string& source, const char* entry_point)
{
ShaderMacro sm_model(m_shader_cache.GetFeatureLevel());
sm_model.AddMacro("PS_SCALE_FACTOR", StringUtil::ToChars(GSConfig.UpscaleMultiplier));
return m_shader_cache.GetPixelShader(source, sm_model.GetPtr(), entry_point);
}
@ -1594,7 +1615,6 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
return it->second.get();
ShaderMacro sm(m_shader_cache.GetFeatureLevel());
sm.AddMacro("PS_SCALE_FACTOR", StringUtil::ToChars(GSConfig.UpscaleMultiplier));
sm.AddMacro("PS_FST", sel.fst);
sm.AddMacro("PS_WMS", sel.wms);
sm.AddMacro("PS_WMT", sel.wmt);

View File

@ -253,7 +253,9 @@ public:
bool green, bool blue, bool alpha) override;
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) override;
void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) override;
void DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture12* dTex, ShaderConvert shader);

View File

@ -22,13 +22,6 @@
class GSTexture12 final : public GSTexture
{
public:
union alignas(16) ClearValue
{
float color[4];
float depth;
};
public:
GSTexture12(Type type, Format format, D3D12::Texture texture);
~GSTexture12() override;
@ -42,8 +35,6 @@ public:
__fi D3D12_RESOURCE_STATES GetResourceState() const { return m_texture.GetState(); }
__fi DXGI_FORMAT GetNativeFormat() const { return m_texture.GetFormat(); }
__fi ID3D12Resource* GetResource() const { return m_texture.GetResource(); }
__fi GSVector4 GetClearColor() const { return GSVector4::load<true>(m_clear_value.color); }
__fi float GetClearDepth() const { return m_clear_value.depth; }
void* GetNativeHandle() const override;
@ -57,17 +48,6 @@ public:
void CommitClear();
void CommitClear(ID3D12GraphicsCommandList* cmdlist);
__fi void SetClearColor(const GSVector4& color)
{
m_state = State::Cleared;
GSVector4::store<true>(m_clear_value.color, color);
}
__fi void SetClearDepth(float depth)
{
m_state = State::Cleared;
m_clear_value.depth = depth;
}
// Call when the texture is bound to the pipeline, or read from in a copy.
__fi void SetUsedThisCommandBuffer()
{
@ -85,8 +65,6 @@ private:
// When this matches the current fence counter, the texture was used this command buffer.
u64 m_use_fence_counter = 0;
ClearValue m_clear_value = {};
GSVector4i m_map_area = GSVector4i::zero();
u32 m_map_level = UINT32_MAX;
};

View File

@ -32,24 +32,6 @@ static CRCHackLevel s_crc_hack_level = CRCHackLevel::Full;
// Partial level, broken on all renderers.
////////////////////////////////////////////////////////////////////////////////
bool GSHwHack::GSC_BigMuthaTruckers(GSRendererHW& r, const GSFrameInfo& fi, int& skip)
{
if (skip == 0)
{
if (fi.TME && (fi.TBP0 == 0x01400 || fi.TBP0 == 0x012c0) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16)
{
// Mid-texture pointer is a cache miss,
// luckily we replace a half-screen TS effect with a full-screen one in
// EmulateTextureShuffleAndFbmask (see #2934).
// While this works for the time being, it's not ideal.
// Skip the unneeded extra TS draw.
skip = 1;
}
}
return true;
}
bool GSHwHack::GSC_DeathByDegreesTekkenNinaWilliams(GSRendererHW& r, const GSFrameInfo& fi, int& skip)
{
// Note: Game also has issues with texture shuffle not supported on strange clamp mode.
@ -1005,7 +987,7 @@ bool GSHwHack::OI_RozenMaidenGebetGarden(GSRendererHW& r, GSTexture* rt, GSTextu
TEX0.TBW = RCONTEXT->FRAME.FBW;
TEX0.PSM = RCONTEXT->FRAME.PSM;
if (GSTextureCache::Target* tmp_rt = r.m_tc->LookupTarget(TEX0, r.GetTargetSize(), GSTextureCache::RenderTarget, true))
if (GSTextureCache::Target* tmp_rt = r.m_tc->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true))
{
GL_INS("OI_RozenMaidenGebetGarden FB clear");
g_gs_device->ClearRenderTarget(tmp_rt->m_texture, 0);
@ -1023,7 +1005,7 @@ bool GSHwHack::OI_RozenMaidenGebetGarden(GSRendererHW& r, GSTexture* rt, GSTextu
TEX0.TBW = RCONTEXT->FRAME.FBW;
TEX0.PSM = RCONTEXT->ZBUF.PSM;
if (GSTextureCache::Target* tmp_ds = r.m_tc->LookupTarget(TEX0, r.GetTargetSize(), GSTextureCache::DepthStencil, true))
if (GSTextureCache::Target* tmp_ds = r.m_tc->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::DepthStencil, true))
{
GL_INS("OI_RozenMaidenGebetGarden ZB clear");
g_gs_device->ClearDepth(tmp_ds->m_texture);
@ -1056,10 +1038,23 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds,
GL_INS("OI_SonicUnleashed replace draw by a copy");
GSTextureCache::Target* src = r.m_tc->LookupTarget(Texture, GSVector2i(1, 1), GSTextureCache::RenderTarget, true);
GSTextureCache::Target* src = r.m_tc->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true);
const GSVector2i rt_size(rt->GetSize());
const GSVector2i src_size(src->m_texture->GetSize());
GSVector2i rt_size(rt->GetSize());
// This is awful, but so is the CRC hack... it's a texture shuffle split horizontally instead of vertically.
if (rt_size.x < src_size.x || rt_size.y < src_size.y)
{
GSTextureCache::Target* rt_again = r.m_tc->LookupTarget(Frame, src_size, src->m_scale, GSTextureCache::RenderTarget, true);
if (rt_again->m_unscaled_size.x < src->m_unscaled_size.x || rt_again->m_unscaled_size.y < src->m_unscaled_size.y)
{
rt_again->ResizeTexture(std::max(rt_again->m_unscaled_size.x, src->m_unscaled_size.x),
std::max(rt_again->m_unscaled_size.y, src->m_unscaled_size.y));
rt = rt_again->m_texture;
}
}
const GSVector2i copy_size(std::min(rt_size.x, src_size.x), std::min(rt_size.y, src_size.y));
const GSVector4 sRect(0.0f, 0.0f, static_cast<float>(copy_size.x) / static_cast<float>(src_size.x), static_cast<float>(copy_size.y) / static_cast<float>(src_size.y));
@ -1147,7 +1142,7 @@ bool GSHwHack::GSC_Battlefield2(GSRendererHW& r, const GSFrameInfo& fi, int& ski
GIFRegTEX0 TEX0 = {};
TEX0.TBP0 = fi.FBP;
TEX0.TBW = 8;
GSTextureCache::Target* dst = r.m_tc->LookupTarget(TEX0, GSRendererHW::GetInstance()->GetTargetSize(), GSTextureCache::DepthStencil, true);
GSTextureCache::Target* dst = r.m_tc->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::DepthStencil, true);
if (dst)
{
g_gs_device->ClearDepth(dst->m_texture);
@ -1218,7 +1213,6 @@ const GSHwHack::Entry<GSRendererHW::GSC_Ptr> GSHwHack::s_get_skip_count_function
CRC_F(GSC_Tekken5, CRCHackLevel::Partial),
// Texture shuffle
CRC_F(GSC_BigMuthaTruckers, CRCHackLevel::Partial),
CRC_F(GSC_DeathByDegreesTekkenNinaWilliams, CRCHackLevel::Partial), // + Upscaling issues
// Upscaling hacks
@ -1285,6 +1279,7 @@ void GSRendererHW::UpdateCRCHacks()
const CRCHackLevel real_level = (GSConfig.CRCHack == CRCHackLevel::Automatic) ?
GSUtil::GetRecommendedCRCHackLevel(GSConfig.Renderer) : GSConfig.CRCHack;
m_nativeres = (GSConfig.UpscaleMultiplier == 1.0f);
s_nativeres = m_nativeres;
s_crc_hack_level = real_level;

View File

@ -18,7 +18,6 @@
class GSHwHack
{
public:
static bool GSC_BigMuthaTruckers(GSRendererHW& r, const GSFrameInfo& fi, int& skip);
static bool GSC_DeathByDegreesTekkenNinaWilliams(GSRendererHW& r, const GSFrameInfo& fi, int& skip);
static bool GSC_GiTS(GSRendererHW& r, const GSFrameInfo& fi, int& skip);
static bool GSC_Manhunt2(GSRendererHW& r, const GSFrameInfo& fi, int& skip);

View File

@ -68,9 +68,9 @@ void GSRendererHW::ReadbackTextureCache()
m_tc->ReadbackAll();
}
GSTexture* GSRendererHW::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size)
GSTexture* GSRendererHW::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size)
{
return m_tc->LookupPaletteSource(CBP, CPSM, CBW, offset, size);
return m_tc->LookupPaletteSource(CBP, CPSM, CBW, offset, scale, size);
}
bool GSRendererHW::UpdateTexIsFB(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0)
@ -200,14 +200,12 @@ void GSRendererHW::VSync(u32 field, bool registers_written)
m_skip_offset = 0;
}
GSTexture* GSRendererHW::GetOutput(int i, int& y_offset)
GSTexture* GSRendererHW::GetOutput(int i, float& scale, int& y_offset)
{
int index = i >= 0 ? i : 1;
GSPCRTCRegs::PCRTCDisplay& curFramebuffer = PCRTCDisplays.PCRTCDisplays[index];
GSVector2i framebufferSize = PCRTCDisplays.GetFramebufferSize(i);
const int fb_width = framebufferSize.x;
const int fb_height = framebufferSize.y;
const GSVector2i framebufferSize(PCRTCDisplays.GetFramebufferSize(i));
PCRTCDisplays.RemoveFramebufferOffset(i);
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
@ -219,11 +217,10 @@ GSTexture* GSRendererHW::GetOutput(int i, int& y_offset)
TEX0.TBW = curFramebuffer.FBW;
TEX0.PSM = curFramebuffer.PSM;
const GSVector2i scaled_size(static_cast<int>(static_cast<float>(fb_width) * GSConfig.UpscaleMultiplier),
static_cast<int>(static_cast<float>(fb_height) * GSConfig.UpscaleMultiplier));
if (GSTextureCache::Target* rt = m_tc->LookupDisplayTarget(TEX0, scaled_size, fb_width, fb_height))
if (GSTextureCache::Target* rt = m_tc->LookupDisplayTarget(TEX0, framebufferSize, GetTextureScaleFactor()))
{
t = rt->m_texture;
scale = rt->m_scale;
const int delta = TEX0.TBP0 - rt->m_TEX0.TBP0;
if (delta > 0 && curFramebuffer.FBW != 0)
@ -248,7 +245,7 @@ GSTexture* GSRendererHW::GetOutput(int i, int& y_offset)
return t;
}
GSTexture* GSRendererHW::GetFeedbackOutput()
GSTexture* GSRendererHW::GetFeedbackOutput(float& scale)
{
const int index = m_regs->EXTBUF.FBIN & 1;
const GSVector2i fb_size(PCRTCDisplays.GetFramebufferSize(index));
@ -258,13 +255,12 @@ GSTexture* GSRendererHW::GetFeedbackOutput()
TEX0.TBW = m_regs->EXTBUF.EXBW;
TEX0.PSM = PCRTCDisplays.PCRTCDisplays[index].PSM;
const GSVector2i scaled_size(static_cast<int>(static_cast<float>(fb_size.x) * GSConfig.UpscaleMultiplier),
static_cast<int>(static_cast<float>(fb_size.y) * GSConfig.UpscaleMultiplier));
GSTextureCache::Target* rt = m_tc->LookupDisplayTarget(TEX0, scaled_size, fb_size.x, fb_size.y);
GSTextureCache::Target* rt = m_tc->LookupDisplayTarget(TEX0, fb_size, GetTextureScaleFactor());
if (!rt)
return nullptr;
GSTexture* t = rt->m_texture;
scale = rt->m_scale;
#ifdef ENABLE_OGL_DEBUG
if (GSConfig.DumpGSData && GSConfig.SaveFrame && s_n >= GSConfig.SaveN)
@ -435,6 +431,45 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
tex_pos &= 0xFF;
read_ba = (tex_pos > 112 && tex_pos < 144);
if (m_split_texture_shuffle_pages > 0)
{
// Input vertices might be bad, so rewrite them.
// We can't use the draw rect exactly here, because if the target was actually larger
// for some reason... unhandled clears, maybe, it won't have been halved correctly.
// So, halve it ourselves.
const GSVector4i dr = GetSplitTextureShuffleDrawRect();
const GSVector4i r = dr.blend32<9>(dr.sra32(1));
GL_CACHE("ConvertSpriteTextureShuffle: Rewrite from %d,%d => %d,%d to %d,%d => %d,%d",
static_cast<int>(m_vt.m_min.p.x), static_cast<int>(m_vt.m_min.p.y), static_cast<int>(m_vt.m_min.p.z),
static_cast<int>(m_vt.m_min.p.w), r.x, r.y, r.z, r.w);
const GSVector4i fpr = r.sll32(4);
v[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + fpr.x);
v[0].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + fpr.y);
v[1].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + fpr.z);
v[1].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + fpr.w);
if (PRIM->FST)
{
v[0].U = fpr.x;
v[0].V = fpr.y;
v[1].U = fpr.z;
v[1].V = fpr.w;
}
else
{
const float th = static_cast<float>(1 << m_context->TEX0.TH);
const GSVector4 st = GSVector4(r) / GSVector4(GSVector2(tw, th)).xyxy();
GSVector4::storel(&v[0].ST.S, st);
GSVector4::storeh(&v[1].ST.S, st);
}
m_vertex.head = m_vertex.tail = m_vertex.next = 2;
m_index.tail = 2;
return;
}
bool half_bottom = false;
switch (GSConfig.UserHacks_HalfBottomOverride)
{
@ -587,7 +622,7 @@ GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Sou
return GSVector4(0.0f);
const GSVertex* v = &m_vertex.buff[0];
const GSVector2& scale = tex->m_texture->GetScale();
const float scale = tex->GetScale();
const bool linear = m_vt.IsRealLinear();
const int t_position = v[0].U;
GSVector4 half_offset(0.0f);
@ -619,13 +654,13 @@ GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Sou
{
if (!linear && t_position == 8)
{
half_offset.x = 8 - 8 / scale.x;
half_offset.y = 8 - 8 / scale.y;
half_offset.x = 8 - 8 / scale;
half_offset.y = 8 - 8 / scale;
}
else if (linear && t_position == 16)
{
half_offset.x = 16 - 16 / scale.x;
half_offset.y = 16 - 16 / scale.y;
half_offset.x = 16 - 16 / scale;
half_offset.y = 16 - 16 / scale;
}
else if (m_vt.m_min.p.x == -0.5f)
{
@ -635,7 +670,7 @@ GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Sou
}
GL_INS("offset detected %f,%f t_pos %d (linear %d, scale %f)",
half_offset.x, half_offset.y, t_position, linear, scale.x);
half_offset.x, half_offset.y, t_position, linear, scale);
}
else if (m_vt.m_eq.q)
{
@ -648,18 +683,17 @@ GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Sou
half_offset.y = 0.5f * q / th;
GL_INS("ST offset detected %f,%f (linear %d, scale %f)",
half_offset.x, half_offset.y, linear, scale.x);
half_offset.x, half_offset.y, linear, scale);
}
return half_offset;
}
GSVector4i GSRendererHW::ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize)
GSVector4i GSRendererHW::ComputeBoundingBox(const GSVector2i& rtsize, float rtscale)
{
const GSVector4 scale = GSVector4(rtscale.x, rtscale.y);
const GSVector4 offset = GSVector4(-1.0f, 1.0f); // Round value
const GSVector4 box = m_vt.m_min.p.xyxy(m_vt.m_max.p) + offset.xxyy();
return GSVector4i(box * scale.xyxy()).rintersect(GSVector4i(0, 0, rtsize.x, rtsize.y));
return GSVector4i(box * GSVector4(rtscale)).rintersect(GSVector4i(0, 0, rtsize.x, rtsize.y));
}
void GSRendererHW::MergeSprite(GSTextureCache::Source* tex)
@ -718,60 +752,162 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex)
}
}
GSVector2 GSRendererHW::GetTextureScaleFactor()
float GSRendererHW::GetTextureScaleFactor()
{
const float f_upscale = GetUpscaleMultiplier();
return GSVector2(f_upscale, f_upscale);
return GetUpscaleMultiplier();
}
GSVector2i GSRendererHW::GetTargetSize(GSVector2i* unscaled_size)
GSVector2i GSRendererHW::GetTargetSize(const GSTextureCache::Source* tex)
{
// Don't blindly expand out to the scissor size if we're not drawing to it.
// e.g. Burnout 3, God of War II, etc.
u32 min_height = std::min<u32>(m_context->scissor.in.w, m_r.w);
u32 min_height = std::min<u32>(static_cast<u32>(m_context->scissor.in.w), m_r.w);
// Another thing these games like to do, is draw a 512x896 shuffle, which would result in us
// expanding the target out to 896 height, but the extra area would all be black, with the
// draw effectively changing nothing for the new area. So, instead, lets try to detect these
// draws by double-checking we're not stretching the texture (gradient of <1).
if (PRIM->TME && m_vt.m_primclass == GS_SPRITE_CLASS && m_src && (m_src->m_target || m_src->m_from_target))
// If the draw is less than a page high, FBW=0 is the same as FBW=1.
const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
u32 width = std::min(std::max<u32>(m_context->FRAME.FBW, 1) * 64u, static_cast<u32>(m_context->scissor.in.z));
if (m_context->FRAME.FBW == 0 && m_r.w > frame_psm.pgs.y)
{
const float diff = std::abs((m_vt.m_max.p.y - m_vt.m_min.p.y) - (m_vt.m_max.t.y - m_vt.m_min.t.y));
if (diff <= 1.0f)
{
// Clamp to the texture size. We're working in unscaled coordinates here, so undo the upscaling.
min_height = std::min(min_height, static_cast<u32>(static_cast<float>(m_src->m_texture->GetHeight()) / m_src->m_texture->GetScale().y));
}
GL_INS("FBW=0 when drawing more than 1 page in height (PSM %s, PGS %dx%d).", psm_str(m_context->FRAME.PSM),
frame_psm.pgs.x, frame_psm.pgs.y);
}
u32 width = std::min(m_context->FRAME.FBW * 64u, static_cast<u32>(m_context->scissor.in.z));
// If it's a channel shuffle, it'll likely be just a single page, so assume full screen.
if (m_channel_shuffle)
{
const int page_x = GSLocalMemory::m_psm[m_context->FRAME.PSM].pgs.x - 1;
const int page_y = GSLocalMemory::m_psm[m_context->FRAME.PSM].pgs.y - 1;
const int page_x = frame_psm.pgs.x - 1;
const int page_y = frame_psm.pgs.y - 1;
// Round up the page as channel shuffles are generally done in pages at a time
width = (std::max(static_cast<u32>(PCRTCDisplays.GetResolution().x), width) + page_x) & ~page_x;
min_height = (std::max(static_cast<u32>(PCRTCDisplays.GetResolution().y), min_height) + page_y) & ~page_y;
}
// Align to even lines, reduces the chance of tiny resizes.
min_height = Common::AlignUpPow2(min_height, 2);
// Align to page size. Since FRAME/Z has to always start on a page boundary, in theory no two should overlap.
min_height = Common::AlignUpPow2(min_height, frame_psm.pgs.y);
// Early detection of texture shuffles. These double the input height because they're interpreting 64x32 C32 pages as 64x64 C16.
// Why? Well, we don't want to be doubling the heights of targets, but also we don't want to align C32 targets to 64 instead of 32.
// Yumeria's text breaks, and GOW goes to 512x448 instead of 512x416 if we don't.
const bool possible_texture_shuffle =
(tex && m_vt.m_primclass == GS_SPRITE_CLASS && frame_psm.bpp == 16 &&
GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16 &&
(tex->m_32_bits_fmt ||
(m_context->TEX0.TBP0 != m_context->FRAME.Block() && IsOpaque() && !(m_context->TEX1.MMIN & 1) &&
m_context->FRAME.FBMSK && m_tc->Has32BitTarget(m_context->FRAME.Block()))));
if (possible_texture_shuffle)
{
GL_CACHE("Halving height due to texture shuffle, %dx%d -> %dx%d", width, min_height, width, min_height / 2);
min_height /= 2;
}
u32 height = m_tc->GetTargetHeight(m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, min_height);
if (unscaled_size)
{
unscaled_size->x = static_cast<int>(width);
unscaled_size->y = static_cast<int>(height);
}
GL_INS("Target size for %x %u %u: %ux%u", m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, width, height);
return GSVector2i(static_cast<int>(static_cast<float>(width) * GSConfig.UpscaleMultiplier),
static_cast<int>(static_cast<float>(height) * GSConfig.UpscaleMultiplier));
return GSVector2i(width, height);
}
bool GSRendererHW::IsSplitTextureShuffle()
{
// For this to work, we're peeking into the next draw, therefore we need dirty registers.
if (m_dirty_gs_regs == 0)
return false;
// Make sure nothing unexpected has changed.
// Twinsanity seems to screw with ZBUF here despite it being irrelevant.
const GSDrawingContext& next_ctx = m_backup_env.CTXT[m_backed_up_ctx];
if (((m_context->stack.TEX0.U64 ^ next_ctx.TEX0.U64) & (~0x3FFF)) != 0 ||
m_context->stack.TEX1.U64 != next_ctx.TEX1.U64 ||
m_context->stack.CLAMP.U64 != next_ctx.CLAMP.U64 ||
m_context->stack.TEST.U64 != next_ctx.TEST.U64 ||
((m_context->stack.FRAME.U64 ^ next_ctx.FRAME.U64) & (~0x1FF)) != 0 ||
m_context->stack.ZBUF.ZMSK != next_ctx.ZBUF.ZMSK)
{
return false;
}
// Check that both the position and texture coordinates are page aligned, so we can work in pages instead of coordinates.
// For texture shuffles, the U will be offset by 8.
const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
const GSLocalMemory::psm_t& tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM];
const GSVector4i pos_rc = GSVector4i(m_vt.m_min.p.upld(m_vt.m_max.p));
const GSVector4i tex_rc = GSVector4i(m_vt.m_min.t.upld(m_vt.m_max.t));
// Width/height should match.
if (pos_rc.width() != tex_rc.width() || pos_rc.height() != tex_rc.height())
return false;
// X might be offset by up to -8/+8, but either the position or UV should be aligned.
GSVector4i aligned_rc = pos_rc.min_i32(tex_rc).blend32<12>(pos_rc.max_i32(tex_rc));
// Check page alignment.
if (aligned_rc.x != 0 || (aligned_rc.z & (frame_psm.pgs.x - 1)) != 0 ||
aligned_rc.y != 0 || (aligned_rc.w & (frame_psm.pgs.y - 1)) != 0)
{
return false;
}
// Matrix Path of Neo draws 512x512 instead of 512x448, then scissors to 512x448.
aligned_rc = aligned_rc.rintersect(GSVector4i(m_context->scissor.in));
// We should have the same number of pages in both the position and UV.
const u32 pages_high = static_cast<u32>(aligned_rc.height()) / frame_psm.pgs.y;
const u32 num_pages = m_context->FRAME.FBW * pages_high;
// If this is a split texture shuffle, the next draw's FRAME/TEX0 should line up.
// Re-add the offset we subtracted in Draw() to get the original FBP/TBP0.. this won't handle wrapping. Oh well.
const u32 expected_next_FBP = (m_context->FRAME.FBP + m_split_texture_shuffle_pages) + num_pages;
const u32 expected_next_TBP0 = (m_context->TEX0.TBP0 + (m_split_texture_shuffle_pages + num_pages) * BLOCKS_PER_PAGE);
GL_CACHE("IsSplitTextureShuffle: Draw covers %ux%u pages, next FRAME %x TEX %x",
static_cast<u32>(aligned_rc.width()) / frame_psm.pgs.x, pages_high, expected_next_FBP * BLOCKS_PER_PAGE,
expected_next_TBP0);
if (next_ctx.TEX0.TBP0 != expected_next_TBP0)
{
GL_CACHE("IsSplitTextureShuffle: Mismatch on TBP0, expecting %x, got %x", expected_next_TBP0, next_ctx.TEX0.TBP0);
return false;
}
// Some games don't offset the FBP.
if (next_ctx.FRAME.FBP != expected_next_FBP && next_ctx.FRAME.FBP != m_context->FRAME.FBP)
{
GL_CACHE("IsSplitTextureShuffle: Mismatch on FBP, expecting %x, got %x", expected_next_FBP * BLOCKS_PER_PAGE,
next_ctx.FRAME.FBP * BLOCKS_PER_PAGE);
return false;
}
// Great, everything lines up, so skip 'em.
GL_CACHE("IsSplitTextureShuffle: Match, buffering and skipping draw.");
if (m_split_texture_shuffle_pages == 0)
{
m_split_texture_shuffle_start_FBP = m_context->FRAME.FBP;
m_split_texture_shuffle_start_TBP = m_context->TEX0.TBP0;
}
m_split_texture_shuffle_pages += num_pages;
m_split_texture_shuffle_pages_high += pages_high;
return true;
}
GSVector4i GSRendererHW::GetSplitTextureShuffleDrawRect() const
{
const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
// Some games (e.g. Crash Twinsanity) adjust both FBP and TBP0, so the rectangle will be half the size
// of the actual shuffle. Others leave the FBP alone, but only adjust TBP0, and offset the draw rectangle
// to the second half of the fb. In which case, the rectangle bounds will be correct.
if (m_context->stack.FRAME.FBP != m_split_texture_shuffle_start_FBP)
{
const int pages_high = (r.height() + frame_psm.pgs.y - 1) / frame_psm.pgs.y;
r.w = (m_split_texture_shuffle_pages_high + pages_high) * frame_psm.pgs.y;
}
// But we still need to page align, because of the +/- 8 offset.
return r.insert64<0>(0).ralign<Align_Outside>(frame_psm.pgs);
}
void GSRendererHW::ExpandTarget(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
@ -1478,6 +1614,31 @@ void GSRendererHW::Draw()
// The rectangle of the draw
m_r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));
const bool is_split_texture_shuffle = (m_split_texture_shuffle_pages > 0);
if (is_split_texture_shuffle)
{
// Adjust the draw rectangle to the new page range, so we get the correct fb height.
const GSVector4i new_r = GetSplitTextureShuffleDrawRect();
GL_CACHE(
"Split texture shuffle: FBP %x -> %x, TBP0 %x -> %x, draw %d,%d => %d,%d -> %d,%d => %d,%d",
m_context->FRAME.Block(), m_split_texture_shuffle_start_FBP * BLOCKS_PER_PAGE,
m_context->TEX0.TBP0, m_split_texture_shuffle_start_TBP,
m_r.x, m_r.y, m_r.z, m_r.w,
new_r.x, new_r.y, new_r.z, new_r.w);
m_r = new_r;
// Adjust the scissor too, if it's in two parts, this will be wrong.
m_context->scissor.in = GSVector4(new_r);
// Fudge FRAME and TEX0 to point to the start of the shuffle.
m_context->TEX0.TBP0 = m_split_texture_shuffle_start_TBP;
m_context->FRAME.FBP = m_split_texture_shuffle_start_FBP;
m_context->offset.fb = GSOffset(GSLocalMemory::m_psm[m_context->FRAME.PSM].info, m_context->FRAME.Block(),
m_context->FRAME.FBW, m_context->FRAME.PSM);
m_context->offset.tex = GSOffset(GSLocalMemory::m_psm[m_context->TEX0.PSM].info, m_context->TEX0.TBP0,
m_context->TEX0.TBW, m_context->TEX0.PSM);
}
if (!GSConfig.UserHacks_DisableSafeFeatures)
{
if (IsConstantDirectWriteMemClear(true))
@ -1652,24 +1813,25 @@ void GSRendererHW::Draw()
m_tc->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr);
}
GSVector2i unscaled_target_size;
const GSVector2i t_size = GetTargetSize(&unscaled_target_size);
const GSVector2i t_size = GetTargetSize(m_src);
// Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area.
m_r = m_r.rintersect(GSVector4i(0, 0, unscaled_target_size.x, unscaled_target_size.y));
TEX0.TBP0 = context->FRAME.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->FRAME.PSM;
m_r = m_r.rintersect(GSVector4i::loadh(t_size));
GSTextureCache::Target* rt = nullptr;
GIFRegTEX0 FRAME_TEX0;
if (!no_rt)
{
FRAME_TEX0.U64 = 0;
FRAME_TEX0.TBP0 = context->FRAME.Block();
FRAME_TEX0.TBW = context->FRAME.FBW;
FRAME_TEX0.PSM = context->FRAME.PSM;
// Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead
// (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to
// create that target, because the clear isn't black, it'll hang around and never get invalidated.
const bool is_square = (unscaled_target_size.y == unscaled_target_size.x) && m_r.w >= 1023 && m_vertex.next == 2;
rt = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::RenderTarget, true, fm, false, unscaled_target_size.x, unscaled_target_size.y, force_preload, IsConstantDirectWriteMemClear(false) && is_square);
const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && m_vertex.next == 2;
rt = m_tc->LookupTarget(FRAME_TEX0, t_size, GetTextureScaleFactor(), GSTextureCache::RenderTarget, true, fm, false, force_preload, IsConstantDirectWriteMemClear(false) && is_square);
// Draw skipped because it was a clear and there was no target.
if (!rt)
@ -1679,14 +1841,15 @@ void GSRendererHW::Draw()
}
}
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->ZBUF.PSM;
GSTextureCache::Target* ds = nullptr;
GIFRegTEX0 ZBUF_TEX0;
if (!no_ds)
{
ds = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::DepthStencil, context->DepthWrite(), 0, false, unscaled_target_size.x, unscaled_target_size.y, force_preload);
ZBUF_TEX0.TBP0 = context->ZBUF.Block();
ZBUF_TEX0.TBW = context->FRAME.FBW;
ZBUF_TEX0.PSM = context->ZBUF.PSM;
ds = m_tc->LookupTarget(ZBUF_TEX0, t_size, GetTextureScaleFactor(), GSTextureCache::DepthStencil, context->DepthWrite(), 0, false, force_preload);
}
if (process_texture)
@ -1731,6 +1894,16 @@ void GSRendererHW::Draw()
GL_INS("WARNING: Possible misdetection of effect, texture shuffle is %s", m_texture_shuffle ? "Enabled" : "Disabled");
}
if (m_texture_shuffle && IsSplitTextureShuffle())
{
// If TEX0 == FBP, we're going to have a source left in the TC.
// That source will get used in the actual draw unsafely, so kick it out.
if (m_context->FRAME.Block() == m_context->TEX0.TBP0)
m_tc->InvalidateVideoMem(context->offset.fb, m_r, false, false);
return;
}
// Texture shuffle is not yet supported with strange clamp mode
ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3));
@ -1770,7 +1943,7 @@ void GSRendererHW::Draw()
// If m_src is from a target that isn't the same size as the texture, texture sample edge modes won't work quite the same way
// If the game actually tries to access stuff outside of the rendered target, it was going to get garbage anyways so whatever
// But the game could issue reads that wrap to valid areas, so move wrapping to the shader if wrapping is used
const GSVector4i unscaled_size = GSVector4i(GSVector4(m_src->m_texture->GetSize()) / GSVector4(m_src->m_texture->GetScale()));
const GSVector2i unscaled_size = m_src->GetUnscaledSize();
if (!is_shuffle && m_context->CLAMP.WMS == CLAMP_REPEAT && (tmm.uses_boundary & TextureMinMaxResult::USES_BOUNDARY_U) && unscaled_size.x != tw)
{
// Our shader-emulated region repeat doesn't upscale :(
@ -1843,33 +2016,60 @@ void GSRendererHW::Draw()
rt->m_32_bits_fmt = m_texture_shuffle || (GSLocalMemory::m_psm[context->FRAME.PSM].bpp != 16);
}
// Deferred update of TEX0. We don't want to change it when we're doing a shuffle/clear, because it
// may increase the buffer width, or change PSM, which breaks P8 conversion amongst other things.
const bool is_mem_clear = IsConstantDirectWriteMemClear(false);
const bool can_update_size = !is_mem_clear && !m_texture_shuffle && !m_channel_shuffle;
if (!m_texture_shuffle && !m_channel_shuffle)
{
if (rt)
{
// Nicktoons Unite tries to change the width from 640 to 512 and breaks FMVs.
// Haunting ground has some messed textures if you don't modify the rest.
// Champions of Norrath expands the width from 512 to 1024, picture cut in half if you don't.
// The safest option is to probably let it expand but not retract.
if (!rt->m_is_frame || rt->m_TEX0.TBW < FRAME_TEX0.TBW)
{
rt->m_TEX0 = FRAME_TEX0;
}
else
{
const u32 width = rt->m_TEX0.TBW;
rt->m_TEX0 = FRAME_TEX0;
rt->m_TEX0.TBW = std::max(width, FRAME_TEX0.TBW);
}
}
if (ds)
ds->m_TEX0 = ZBUF_TEX0;
}
if (rt)
rt->Update(true);
if (ds)
ds->Update(true);
const GSVector2i resolution = PCRTCDisplays.GetResolution();
GSTextureCache::Target* old_rt = nullptr;
GSTextureCache::Target* old_ds = nullptr;
{
// We still need to make sure the dimensions of the targets match.
const GSVector2 up_s(GetTextureScaleFactor());
const int new_w = std::max(t_size.x, std::max(rt ? rt->m_texture->GetWidth() : 0, ds ? ds->m_texture->GetWidth() : 0));
const int new_h = std::max(t_size.y, std::max(rt ? rt->m_texture->GetHeight() : 0, ds ? ds->m_texture->GetHeight() : 0));
const float up_s = GetTextureScaleFactor();
const int new_w = std::max(t_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0));
const int new_h = std::max(t_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0));
if (rt)
{
const u32 old_end_block = rt->m_end_block;
const bool new_rect = rt->m_valid.rempty();
const bool new_height = new_h > rt->m_texture->GetHeight();
const bool new_height = new_h > rt->GetUnscaledHeight();
const int old_height = rt->m_texture->GetHeight();
pxAssert(rt->m_texture->GetScale() == up_s);
rt->ResizeTexture(new_w, new_h, up_s);
pxAssert(rt->GetScale() == up_s);
rt->ResizeTexture(new_w, new_h);
if (!m_texture_shuffle && !m_channel_shuffle)
{
const GSVector2i tex_size = rt->m_texture->GetSize();
const GSVector4i new_valid = GSVector4i(0, 0, tex_size.x / up_s.x, tex_size.y / up_s.y);
rt->ResizeValidity(new_valid);
}
rt->ResizeValidity(rt->GetUnscaledRect());
// Limit to 2x the vertical height of the resolution (for double buffering)
rt->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2));
@ -1898,18 +2098,15 @@ void GSRendererHW::Draw()
{
const u32 old_end_block = ds->m_end_block;
const bool new_rect = ds->m_valid.rempty();
const bool new_height = new_h > ds->m_texture->GetHeight();
const bool new_height = new_h > ds->GetUnscaledHeight();
const int old_height = ds->m_texture->GetHeight();
pxAssert(ds->m_texture->GetScale() == up_s);
ds->ResizeTexture(new_w, new_h, up_s);
pxAssert(ds->GetScale() == up_s);
ds->ResizeTexture(new_w, new_h);
if (!m_texture_shuffle && !m_channel_shuffle)
{
const GSVector2i tex_size = ds->m_texture->GetSize();
const GSVector4i new_valid = GSVector4i(0, 0, tex_size.x / up_s.x, tex_size.y / up_s.y);
ds->ResizeValidity(new_valid);
}
ds->ResizeValidity(ds->GetUnscaledRect());
// Limit to 2x the vertical height of the resolution (for double buffering)
ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2));
@ -2048,7 +2245,7 @@ void GSRendererHW::Draw()
//
DrawPrims(rt ? rt->m_texture : nullptr, ds ? ds->m_texture : nullptr, m_src);
DrawPrims(rt, ds, m_src);
//
@ -2091,6 +2288,17 @@ void GSRendererHW::Draw()
m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block());
}
// Restore modified offsets.
if (is_split_texture_shuffle)
{
m_context->FRAME.FBP = m_context->stack.FRAME.FBP;
m_context->TEX0.TBP0 = m_context->stack.TEX0.TBP0;
m_context->offset.fb = GSOffset(GSLocalMemory::m_psm[m_context->FRAME.PSM].info, m_context->FRAME.Block(),
m_context->FRAME.FBW, m_context->FRAME.PSM);
m_context->offset.tex = GSOffset(GSLocalMemory::m_psm[m_context->TEX0.PSM].info, m_context->TEX0.TBP0,
m_context->TEX0.TBW, m_context->TEX0.PSM);
}
//
if (GSConfig.DumpGSData)
@ -2176,7 +2384,7 @@ bool GSRendererHW::VerifyIndices()
return true;
}
void GSRendererHW::SetupIA(const float& sx, const float& sy)
void GSRendererHW::SetupIA(float target_scale, float sx, float sy)
{
GL_PUSH("IA");
@ -2185,7 +2393,8 @@ void GSRendererHW::SetupIA(const float& sx, const float& sy)
for (u32 i = 0; i < m_vertex.next; i++)
m_vertex.buff[i].UV &= 0x3FEF3FEF;
}
const bool unscale_pt_ln = !GSConfig.UserHacks_DisableSafeFeatures && (GetUpscaleMultiplier() != 1.0f);
const bool unscale_pt_ln = !GSConfig.UserHacks_DisableSafeFeatures && (target_scale != 1.0f);
const GSDevice::FeatureSupport features = g_gs_device->Features();
ASSERT(VerifyIndices());
@ -2201,6 +2410,7 @@ void GSRendererHW::SetupIA(const float& sx, const float& sy)
if (features.point_expand)
{
m_conf.vs.point_size = true;
m_conf.cb_vs.point_size = GSVector2(target_scale);
}
else if (features.geometry_shader)
{
@ -2216,6 +2426,11 @@ void GSRendererHW::SetupIA(const float& sx, const float& sy)
ExpandIndices<GSHWDrawConfig::VSExpand::Point>();
}
}
else
{
// Vulkan/GL still need to set point size.
m_conf.cb_vs.point_size = target_scale;
}
break;
case GS_LINE_CLASS:
@ -2458,6 +2673,12 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask()
{
m_conf.ps.fbmask = 0;
}
// Once we draw the shuffle, no more buffering.
m_split_texture_shuffle_pages = 0;
m_split_texture_shuffle_pages_high = 0;
m_split_texture_shuffle_start_FBP = 0;
m_split_texture_shuffle_start_TBP = 0;
}
else
{
@ -3458,7 +3679,8 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
m_conf.ps.ltf = bilinear && shader_emulated_sampler;
m_conf.ps.point_sampler = g_gs_device->Features().broken_point_sampler && (!bilinear || shader_emulated_sampler);
const GSVector2 scale = tex->m_texture->GetScale();
const float scale = tex->GetScale();
const GSVector2i unscaled_size = tex->GetUnscaledSize();
const int w = tex->m_texture->GetWidth();
const int h = tex->m_texture->GetHeight();
@ -3467,9 +3689,12 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
const int miptw = 1 << tex->m_TEX0.TW;
const int mipth = 1 << tex->m_TEX0.TH;
const GSVector4 WH(static_cast<float>(tw), static_cast<float>(th), miptw * scale.x, mipth * scale.y);
const GSVector4 WH(static_cast<float>(tw), static_cast<float>(th), miptw * scale, mipth * scale);
const GSVector4 st_scale = WH.zwzw() / GSVector4(w, h).xyxy();
m_conf.cb_ps.STScale = GSVector2(st_scale.x, st_scale.y);
// Reduction factor when source is a target and smaller/larger than TW/TH.
m_conf.cb_ps.STScale = GSVector2(static_cast<float>(miptw) / static_cast<float>(unscaled_size.x),
static_cast<float>(mipth) / static_cast<float>(unscaled_size.y));
if (tex->m_region.HasX())
{
@ -3647,16 +3872,14 @@ void GSRendererHW::ResetStates()
memset(&m_conf, 0, reinterpret_cast<const char*>(&m_conf.cb_vs) - reinterpret_cast<const char*>(&m_conf));
}
void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* ds, GSTextureCache::Source* tex)
{
#ifdef ENABLE_OGL_DEBUG
const GSVector4i area_out = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
const GSVector4i area_in = GSVector4i(m_vt.m_min.t.xyxy(m_vt.m_max.t));
GL_PUSH("GL Draw from %d (area %d,%d => %d,%d) in %d (Depth %d) (area %d,%d => %d,%d)",
tex && tex->m_texture ? tex->m_texture->GetID() : -1,
GL_PUSH("GL Draw from (area %d,%d => %d,%d) in (area %d,%d => %d,%d)",
area_in.x, area_in.y, area_in.z, area_in.w,
rt ? rt->GetID() : -1, ds ? ds->GetID() : -1,
area_out.x, area_out.y, area_out.z, area_out.w);
#endif
@ -3669,12 +3892,13 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
const bool ate_second_pass = m_context->TEST.DoSecondPass();
ResetStates();
m_conf.cb_vs.texture_offset = GSVector2(0, 0);
m_conf.ps.scanmsk = m_env.SCANMSK.MSK;
m_conf.rt = rt;
m_conf.ds = ds;
ASSERT(g_gs_device != nullptr);
const float scale_factor = rt ? rt->GetScale() : ds->GetScale();
m_conf.cb_vs.texture_offset = {};
m_conf.cb_ps.ScaleFactor = GSVector4(scale_factor * (1.0f / 16.0f), 1.0f / scale_factor, 0.0f, 0.0f);
m_conf.ps.scanmsk = m_env.SCANMSK.MSK;
m_conf.rt = rt ? rt->m_texture : nullptr;
m_conf.ds = ds ? ds->m_texture : nullptr;
// Z setup has to come before channel shuffle
EmulateZbuffer();
@ -3833,8 +4057,7 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
if (m_conf.destination_alpha >= GSHWDrawConfig::DestinationAlphaMode::Stencil &&
m_conf.destination_alpha <= GSHWDrawConfig::DestinationAlphaMode::StencilOne && !m_conf.ds)
{
temp_ds = g_gs_device->CreateDepthStencil(rt->GetWidth(), rt->GetHeight(), GSTexture::Format::DepthStencil, false);
temp_ds->SetScale(m_conf.rt->GetScale());
temp_ds = g_gs_device->CreateDepthStencil(m_conf.rt->GetWidth(), m_conf.rt->GetHeight(), GSTexture::Format::DepthStencil, false);
m_conf.ds = temp_ds;
}
@ -3844,10 +4067,10 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
m_conf.vs.fst = PRIM->FST;
// FIXME D3D11 and GL support half pixel center. Code could be easier!!!
const GSVector2i rtsize(m_conf.ds ? m_conf.ds->GetSize() : m_conf.rt->GetSize());
const GSVector2 rtscale(m_conf.ds ? m_conf.ds->GetScale() : m_conf.rt->GetScale());
const float sx = 2.0f * rtscale.x / (rtsize.x << 4);
const float sy = 2.0f * rtscale.y / (rtsize.y << 4);
const GSVector2i rtsize = m_conf.ds ? m_conf.ds->GetSize() : m_conf.rt->GetSize();
const float rtscale = (ds ? ds->GetScale() : rt->GetScale());
const float sx = 2.0f * rtscale / (rtsize.x << 4);
const float sy = 2.0f * rtscale / (rtsize.y << 4);
const float ox = static_cast<float>(static_cast<int>(m_context->XYOFFSET.OFX));
const float oy = static_cast<float>(static_cast<int>(m_context->XYOFFSET.OFY));
float ox2 = -1.0f / rtsize.x;
@ -3991,7 +4214,7 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
// Extract the depth as palette index
m_conf.ps.depth_fmt = 1;
m_conf.ps.channel = ChannelFetch_BLUE;
m_conf.tex = ds;
m_conf.tex = ds->m_texture;
// We need the palette to convert the depth to the correct alpha value.
if (!tex->m_palette)
@ -4026,12 +4249,12 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
// rs
const GSVector4 hacked_scissor(m_channel_shuffle ? GSVector4(0, 0, 1024, 1024) : m_context->scissor.in);
const GSVector4i scissor(GSVector4i(GSVector4(rtscale).xyxy() * hacked_scissor).rintersect(GSVector4i(rtsize).zwxy()));
const GSVector4i scissor(GSVector4i(GSVector4(rtscale) * hacked_scissor).rintersect(GSVector4i(rtsize).zwxy()));
m_conf.drawarea = m_channel_shuffle ? scissor : scissor.rintersect(ComputeBoundingBox(rtscale, rtsize));
m_conf.drawarea = m_channel_shuffle ? scissor : scissor.rintersect(ComputeBoundingBox(rtsize, rtscale));
m_conf.scissor = (DATE && !DATE_BARRIER) ? m_conf.drawarea : scissor;
SetupIA(sx, sy);
SetupIA(rtscale, sx, sy);
m_conf.alpha_second_pass.enable = ate_second_pass;
@ -4439,26 +4662,6 @@ void GSRendererHW::OI_DoubleHalfClear(GSTextureCache::Target*& rt, GSTextureCach
GL_INS("OI_DoubleHalfClear:%s: base %x half %x. w_pages %d h_pages %d fbw %d. Color %x",
clear_depth ? "depth" : "target", base << 5, half << 5, w_pages, h_pages, m_context->FRAME.FBW, color);
// Handle the case where the game stacks FBP and ZBP immediately after one another.
// We incorrectly compute the height here, because both the scissor and draw rectangle will only be half
// the height of what's effectively being cleared. Spider-Man 2's shadows are a good test case here: it
// draws the shadow map to a 128x128 texture, but relies on a 1 pixel border around the edge to "cut off"
// the shadows. We cap it to a 256 height, because having a >=512 height framebuffer is very rare, and it
// stops us doubling actual framebuffers unintentionally (very common).
GSTextureCache::Target* t = clear_depth ? ds : rt;
const u32 unscaled_height = static_cast<u32>(static_cast<float>(t->m_texture->GetHeight()) / t->m_texture->GetScale().y);
if (unscaled_height == m_context->scissor.in.w && unscaled_height <= 256)
{
t->ResizeTexture(t->m_texture->GetWidth(), t->m_texture->GetHeight() * 2, t->m_texture->GetScale());
if (clear_depth)
rt = nullptr;
else
ds = nullptr;
// Feed it back into the height cache.
m_tc->GetTargetHeight(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, unscaled_height * 2);
}
if (clear_depth)
{
// Only pure clear are supported for depth

View File

@ -84,10 +84,10 @@ private:
template <bool linear>
void RoundSpriteOffset();
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* ds, GSTextureCache::Source* tex);
void ResetStates();
void SetupIA(const float& sx, const float& sy);
void SetupIA(float target_scale, float sx, float sy);
void EmulateTextureShuffleAndFbmask();
void EmulateChannelShuffle(const GSTextureCache::Source* tex);
void EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass);
@ -97,6 +97,9 @@ private:
void SetTCOffset();
bool IsSplitTextureShuffle();
GSVector4i GetSplitTextureShuffleDrawRect() const;
GSTextureCache* m_tc;
GSVector4i m_r = {};
GSTextureCache::Source* m_src = nullptr;
@ -108,6 +111,11 @@ private:
int m_skip = 0;
int m_skip_offset = 0;
u32 m_split_texture_shuffle_pages = 0;
u32 m_split_texture_shuffle_pages_high = 0;
u32 m_split_texture_shuffle_start_FBP = 0;
u32 m_split_texture_shuffle_start_TBP = 0;
u32 m_last_channel_shuffle_fbmsk = 0;
bool m_channel_shuffle = false;
@ -148,17 +156,17 @@ public:
template <GSHWDrawConfig::VSExpand Expand> void ExpandIndices();
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba);
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize);
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);
void MergeSprite(GSTextureCache::Source* tex);
GSVector2 GetTextureScaleFactor() override;
GSVector2i GetTargetSize(GSVector2i* unscaled_size = nullptr);
float GetTextureScaleFactor() override;
GSVector2i GetTargetSize(const GSTextureCache::Source* tex = nullptr);
void Reset(bool hardware_reset) override;
void UpdateSettings(const Pcsx2Config::GSOptions& old_config) override;
void VSync(u32 field, bool registers_written) override;
GSTexture* GetOutput(int i, int& y_offset) override;
GSTexture* GetFeedbackOutput() override;
GSTexture* GetOutput(int i, float& scale, int& y_offset) override;
GSTexture* GetFeedbackOutput(float& scale) override;
void ExpandTarget(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) override;
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool eewrite = false) override;
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) override;
@ -167,7 +175,7 @@ public:
void PurgeTextureCache() override;
void ReadbackTextureCache() override;
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size) override;
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size) override;
// Called by the texture cache to know if current texture is useful
bool UpdateTexIsFB(GSTextureCache::Target* src, const GIFRegTEX0& TEX0);

View File

@ -182,6 +182,8 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
// Create a shared texture source
src = new Source(TEX0, TEXA);
src->m_texture = dst->m_texture;
src->m_scale = dst->m_scale;
src->m_unscaled_size = dst->m_unscaled_size;
src->m_shared_texture = true;
src->m_target = true; // So renderer can check if a conversion is required
src->m_from_target = &dst->m_texture; // avoid complex condition on the renderer
@ -225,7 +227,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
}
ASSERT(src->m_texture);
ASSERT(src->m_texture->GetScale() == (dst ? dst->m_texture->GetScale() : GSVector2(1, 1)));
ASSERT(src->m_scale == (dst ? dst->m_scale : 1.0f));
return src;
}
@ -538,14 +540,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
AddDirtyRectTarget(dst, dirty_rect, dst->m_TEX0.PSM, dst->m_TEX0.TBW, rgba);
}
}
const GSVector2 up_s(dst->m_texture->GetScale());
const int new_w = std::max(static_cast<int>(dst->m_valid.z * up_s.x), dst->m_texture->GetWidth());
const int new_h = std::max(static_cast<int>(dst->m_valid.w * up_s.y), dst->m_texture->GetHeight());
if (new_w > dst->m_texture->GetWidth() || new_h > dst->m_texture->GetHeight())
{
dst->ResizeTexture(new_w, new_h, up_s);
}
if (dst->m_valid.z > dst->m_unscaled_size.x || dst->m_valid.w > dst->m_unscaled_size.y)
dst->ResizeTexture(dst->m_valid.z, dst->m_valid.w);
}
// Pure depth texture format will be fetched by LookupDepthSource.
// However guess what, some games (GoW) read the depth as a standard
@ -584,7 +581,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
}
if (tex_merge_rt)
src = CreateMergedSource(TEX0, TEXA, region, dst->m_texture->GetScale());
src = CreateMergedSource(TEX0, TEXA, region, dst->m_scale);
}
if (!src)
@ -641,28 +638,27 @@ GSTextureCache::Target* GSTextureCache::FindTargetOverlap(u32 bp, u32 end_block,
return nullptr;
}
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask, const bool is_frame, const int real_w, const int real_h, bool preload, bool is_clear)
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, float scale, int type, bool used, u32 fbmask, const bool is_frame, bool preload, bool is_clear)
{
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
const GSVector2& new_s = static_cast<GSRendererHW*>(g_gs_renderer.get())->GetTextureScaleFactor();
const u32 bp = TEX0.TBP0;
GSVector2 res_size{0, 0};
GSVector2i new_size{0, 0};
GSVector2i new_scaled_size{0, 0};
const GSVector4 sRect(0, 0, 1, 1);
GSVector4 dRect{};
bool clear = true;
const auto& calcRescale = [size, new_s, &res_size, &new_size, &clear, &dRect](const GSTexture* tex)
const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt)
{
// TODO Possible optimization: rescale only the validity rectangle of the old target texture into the new one.
const GSVector2& old_s = tex->GetScale();
const GSVector2 ratio = new_s / old_s;
const int old_w = tex->GetWidth();
const int old_h = tex->GetHeight();
res_size = GSVector2(old_w, old_h) * ratio;
new_size.x = std::max(static_cast<int>(std::ceil(res_size.x)), size.x);
new_size.y = std::max(static_cast<int>(std::ceil(res_size.y)), size.y);
clear = new_size.x > res_size.x || new_size.y > res_size.y;
dRect = GSVector4(0.0f, 0.0f, res_size.x, res_size.y);
clear = (size.x > tgt->m_unscaled_size.x || size.y > tgt->m_unscaled_size.y);
new_size.x = std::max(size.x, tgt->m_unscaled_size.x);
new_size.y = std::max(size.y, tgt->m_unscaled_size.y);
new_scaled_size.x = static_cast<int>(std::ceil(static_cast<float>(new_size.x) * scale));
new_scaled_size.y = static_cast<int>(std::ceil(static_cast<float>(new_size.y) * scale));
dRect = (GSVector4(GSVector4i::loadh(tgt->m_unscaled_size)) * GSVector4(scale)).ceil();
GL_INS("TC Rescale: %dx%d: %dx%d @ %f -> %dx%d @ %f", tgt->m_unscaled_size.x, tgt->m_unscaled_size.y,
tgt->m_texture->GetWidth(), tgt->m_texture->GetHeight(), tgt->m_scale, new_scaled_size.x, new_scaled_size.y,
scale);
};
Target* dst = nullptr;
@ -682,21 +678,6 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
dst = t;
dst->m_32_bits_fmt |= (psm_s.bpp != 16);
// Nicktoons Unite tries to change the width from 640 to 512 and breaks FMVs.
// Haunting ground has some messed textures if you don't modify the rest.
// Champions of Norrath expands the width from 512 to 1024, picture cut in half if you don't.
// The safest option is to probably let it expand but not retract.
if (!dst->m_is_frame || dst->m_TEX0.TBW < TEX0.TBW)
{
dst->m_TEX0 = TEX0;
}
else
{
u32 width = dst->m_TEX0.TBW;
dst->m_TEX0 = TEX0;
dst->m_TEX0.TBW = width;
}
break;
}
}
@ -721,8 +702,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
dst = t;
GL_CACHE("TC: Lookup Frame %dx%d, perfect hit: %d (0x%x -> 0x%x %s)", size.x, size.y, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM));
if (real_h > 0 || real_w > 0)
ScaleTargetForDisplay(dst, TEX0, real_w, real_h);
if (size.x > 0 || size.y > 0)
ScaleTargetForDisplay(dst, TEX0, size.x, size.y);
break;
}
@ -731,11 +712,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
// 2nd try ! Try to find a frame at the requested bp -> bp + size is inside of (or equal to)
if (!dst)
{
const u32 needed_end = GSLocalMemory::m_psm[TEX0.PSM].info.bn(real_w - 1, real_h - 1, bp, TEX0.TBW);
const u32 needed_end = GSLocalMemory::m_psm[TEX0.PSM].info.bn(size.x - 1, size.y - 1, bp, TEX0.TBW);
for (auto t : list)
{
// Make sure the target is inside the texture
if (t->m_TEX0.TBP0 <= bp && bp <= t->m_end_block && t->Inside(bp, TEX0.TBW, TEX0.PSM, GSVector4i(0, 0, real_w, real_h)))
if (t->m_TEX0.TBP0 <= bp && bp <= t->m_end_block && t->Inside(bp, TEX0.TBW, TEX0.PSM, GSVector4i::loadh(size)))
{
// If we already have an old one, make sure the "new" one matches at least on one end (double buffer?).
if (old_found && (t->m_age > 4 || (t->m_TEX0.TBP0 != bp && needed_end != t->m_end_block)))
@ -744,8 +725,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
dst = t;
GL_CACHE("TC: Lookup Frame %dx%d, inclusive hit: %d (0x%x, took 0x%x -> 0x%x %s)", size.x, size.y, t->m_texture->GetID(), bp, t->m_TEX0.TBP0, t->m_end_block, psm_str(TEX0.PSM));
if (real_h > 0 || real_w > 0)
ScaleTargetForDisplay(dst, TEX0, real_w, real_h);
if (size.x > 0 || size.y > 0)
ScaleTargetForDisplay(dst, TEX0, size.x, size.y);
break;
}
@ -780,21 +761,24 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
if (dst)
{
GL_CACHE("TC: Lookup %s(%s) %dx%d, hit: %d (0x%x, %s)", is_frame ? "Frame" : "Target", to_string(type), size.x, size.y, dst->m_texture->GetID(), bp, psm_str(TEX0.PSM));
GL_CACHE("TC: Lookup %s(%s) %dx%d (0x%x, BW:%u, %s) hit (0x%x, BW:%d, %s)", is_frame ? "Frame" : "Target",
to_string(type), size.x, size.y, bp, TEX0.TBW, psm_str(TEX0.PSM), dst->m_TEX0.TBP0, dst->m_TEX0.TBW, psm_str(dst->m_TEX0.PSM));
dst->Update(!is_frame || old_found == dst);
// Update is done by caller after TEX0 update for non-frame.
if (is_frame)
dst->Update(old_found == dst);
const GSVector2& old_s = dst->m_texture->GetScale();
if (new_s != old_s)
if (dst->m_scale != scale)
{
calcRescale(dst->m_texture);
GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_size.x, new_size.y, GSTexture::Format::Color, clear) :
g_gs_device->CreateDepthStencil(new_size.x, new_size.y, GSTexture::Format::DepthStencil, clear);
calcRescale(dst);
GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, clear) :
g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, clear);
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false);
m_target_memory_usage = (m_target_memory_usage - dst->m_texture->GetMemUsage()) + tex->GetMemUsage();
g_gs_device->Recycle(dst->m_texture);
tex->SetScale(new_s);
dst->m_texture = tex;
dst->m_scale = scale;
dst->m_unscaled_size = new_size;
}
if (!is_frame)
@ -827,21 +811,22 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
if (dst_match)
{
dst_match->Update(true);
calcRescale(dst_match->m_texture);
dst = CreateTarget(TEX0, new_size.x, new_size.y, type, clear);
calcRescale(dst_match);
dst = CreateTarget(TEX0, new_size.x, new_size.y, scale, type, clear);
dst->m_32_bits_fmt = dst_match->m_32_bits_fmt;
dst->OffsetHack_modxy = dst_match->OffsetHack_modxy;
ShaderConvert shader;
// m_32_bits_fmt gets set on a shuffle or if the format isn't 16bit.
// In this case it needs to make sure it isn't part of a shuffle, where it needs to be interpreted as 32bits.
const bool fmt_16_bits = (psm_s.bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp == 16 && !dst->m_32_bits_fmt);
if (type == DepthStencil)
{
GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, %s was %s)", new_size.x, new_size.y, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM));
GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, TBW %d, %s was %s)", new_size.x, new_size.y, bp, TEX0.TBW, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM));
shader = (fmt_16_bits) ? ShaderConvert::RGB5A1_TO_FLOAT16 : (ShaderConvert)(static_cast<int>(ShaderConvert::RGBA8_TO_FLOAT32) + psm_s.fmt);
}
else
{
GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, %s was %s)", new_size.x, new_size.y, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM));
GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, TBW %d, %s was %s)", new_size.x, new_size.y, bp, TEX0.TBW, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM));
shader = (fmt_16_bits) ? ShaderConvert::FLOAT16_TO_RGB5A1 : ShaderConvert::FLOAT32_TO_RGBA8;
}
g_gs_device->StretchRect(dst_match->m_texture, sRect, dst->m_texture, dRect, shader, false);
@ -857,9 +842,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
return nullptr;
}
GL_CACHE("TC: Lookup %s(%s) %dx%d, miss (0x%x, %s)", is_frame ? "Frame" : "Target", to_string(type), size.x, size.y, bp, psm_str(TEX0.PSM));
GL_CACHE("TC: Lookup %s(%s) %dx%d, miss (0x%x, TBW %d, %s)", is_frame ? "Frame" : "Target", to_string(type), size.x, size.y, bp, TEX0.TBW, psm_str(TEX0.PSM));
dst = CreateTarget(TEX0, size.x, size.y, type, true);
dst = CreateTarget(TEX0, size.x, size.y, scale, type, true);
// In theory new textures contain invalidated data. Still in theory a new target
// must contains the content of the GS memory.
// In practice, TC will wrongly invalidate some RT. For example due to write on the alpha
@ -876,7 +861,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
if (TEX0.TBW > 0 && supported_fmt)
{
const bool forced_preload = GSRendererHW::GetInstance()->m_force_preload > 0;
const GSVector4i newrect = GSVector4i(0, 0, real_w, real_h);
const GSVector4i newrect = GSVector4i::loadh(size);
const u32 rect_end = GSLocalMemory::m_psm[TEX0.PSM].info.bn(newrect.z - 1, newrect.w - 1, TEX0.TBP0, TEX0.TBW);
RGBAMask rgba;
rgba._u32 = GSUtil::GetChannelMask(TEX0.PSM);
@ -924,7 +909,6 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
eerect = eerect.rintersect(newrect);
dst->UpdateValidity(newrect);
AddDirtyRectTarget(dst, eerect, TEX0.PSM, TEX0.TBW, rgba);
dst->Update(true);
}
}
else
@ -932,7 +916,6 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
GL_INS("Preloading the RT DATA");
dst->UpdateValidity(newrect);
AddDirtyRectTarget(dst, newrect, TEX0.PSM, TEX0.TBW, rgba);
dst->Update(true);
}
}
dst->m_is_frame = is_frame;
@ -946,14 +929,13 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
dst->readbacks_since_draw = 0;
assert(dst && dst->m_texture && dst->m_texture->GetScale() == new_s);
assert(dst && dst->m_dirty.empty());
assert(dst && dst->m_texture && dst->m_scale == scale);
return dst;
}
GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_w, const int real_h)
GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, float scale)
{
return LookupTarget(TEX0, size, RenderTarget, true, 0, true, real_w, real_h);
return LookupTarget(TEX0, size, scale, RenderTarget, true, 0, true);
}
void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, int real_w, int real_h)
@ -982,32 +964,34 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb,
// Take that into consideration to find the extent of the target which will be sampled.
GSTexture* old_texture = t->m_texture;
const int old_width = static_cast<int>(static_cast<float>(old_texture->GetWidth()) / old_texture->GetScale().x);
const int old_height = static_cast<int>(static_cast<float>(old_texture->GetHeight()) / old_texture->GetScale().y);
const float scale = t->m_scale;
const int old_width = t->m_unscaled_size.x;
const int old_height = t->m_unscaled_size.y;
const int needed_height = std::min(real_h + y_offset, GSRendererHW::MAX_FRAMEBUFFER_HEIGHT);
const int scaled_needed_height = std::max(static_cast<int>(static_cast<float>(needed_height) * old_texture->GetScale().y), old_texture->GetHeight());
const int needed_width = std::min(real_w, static_cast<int>(dispfb.TBW * 64));
const int scaled_needed_width = std::max(static_cast<int>(static_cast<float>(needed_width) * old_texture->GetScale().x), old_texture->GetWidth());
if (scaled_needed_height <= old_texture->GetHeight() && scaled_needed_width <= old_texture->GetWidth())
if (needed_height <= t->m_unscaled_size.y && needed_width <= t->m_unscaled_size.x)
return;
// We're expanding, so create a new texture.
GSTexture* new_texture = g_gs_device->CreateRenderTarget(scaled_needed_width, scaled_needed_height, GSTexture::Format::Color, false);
const int new_height = std::max(t->m_unscaled_size.y, needed_height);
const int new_width = std::max(t->m_unscaled_size.x, needed_width);
const int scaled_new_height = static_cast<int>(std::ceil(static_cast<float>(new_height) * scale));
const int scaled_new_width = static_cast<int>(std::ceil(static_cast<float>(new_width) * scale));
GSTexture* new_texture = g_gs_device->CreateRenderTarget(scaled_new_width, scaled_new_height, GSTexture::Format::Color, false);
if (!new_texture)
{
// Memory allocation failure, do our best to hobble along.
return;
}
// Keep the scale of the original texture.
new_texture->SetScale(old_texture->GetScale());
GL_CACHE("Expanding target for display output, target height %d @ 0x%X, display %d @ 0x%X offset %d needed %d",
t->m_texture->GetHeight(), t->m_TEX0.TBP0, real_h, dispfb.TBP0, y_offset, needed_height);
t->m_unscaled_size.y, t->m_TEX0.TBP0, real_h, dispfb.TBP0, y_offset, needed_height);
// Fill the new texture with the old data, and discard the old texture.
g_gs_device->StretchRect(old_texture, new_texture, GSVector4(old_texture->GetSize()).zwxy(), ShaderConvert::COPY, false);
m_target_memory_usage = (m_target_memory_usage - old_texture->GetMemUsage()) + new_texture->GetMemUsage();
g_gs_device->Recycle(old_texture);
t->m_texture = new_texture;
t->m_unscaled_size = GSVector2i(new_width, new_height);
RGBAMask rgba;
rgba._u32 = GSUtil::GetChannelMask(t->m_TEX0.PSM);
@ -1022,7 +1006,7 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb,
}
else
{
const GSVector4i newrect = GSVector4i((old_height < scaled_needed_height) ? 0 : old_width,
const GSVector4i newrect = GSVector4i((old_height < new_height) ? 0 : old_width,
(old_width < preload_width) ? 0 : old_height,
preload_width, needed_height);
AddDirtyRectTarget(t, newrect, t->m_TEX0.PSM, t->m_TEX0.TBW, rgba);
@ -1086,14 +1070,12 @@ void GSTextureCache::ExpandTarget(const GIFRegBITBLTBUF& BITBLTBUF, const GSVect
{
// Round up to the nearest even height, like the draw target allocator.
const s32 aligned_height = Common::AlignUpPow2(r.w, 2);
const GSVector2i rect_scaled = GSVector2i(r.z * g_gs_renderer->GetUpscaleMultiplier(), aligned_height * g_gs_renderer->GetUpscaleMultiplier());
const int upsc_width = std::max(rect_scaled.x, dst->m_texture->GetWidth());
const int upsc_height = std::max(rect_scaled.y, dst->m_texture->GetHeight());
if (dst->m_texture->GetWidth() < upsc_width || dst->m_texture->GetHeight() < upsc_height)
if (r.z > dst->m_unscaled_size.x || aligned_height > dst->m_unscaled_size.y)
{
// We don't recycle here, because most of the time when this happens it's strange-sized textures
// which are being expanded one-line-at-a-time.
if (dst->ResizeTexture(upsc_width, upsc_height, false))
if (dst->ResizeTexture(std::max(r.z, dst->m_unscaled_size.x),
std::max(aligned_height, dst->m_unscaled_size.y), false))
{
AddDirtyRectTarget(dst, r, TEX0.PSM, TEX0.TBW, rgba);
GetTargetHeight(TEX0.TBP0, TEX0.TBW, TEX0.PSM, aligned_height);
@ -1104,9 +1086,7 @@ void GSTextureCache::ExpandTarget(const GIFRegBITBLTBUF& BITBLTBUF, const GSVect
}
else
{
const GSVector4i clamped_r(
r.rintersect(GSVector4i(0, 0, static_cast<int>(dst->m_texture->GetWidth() / dst->m_texture->GetScale().x),
static_cast<int>(dst->m_texture->GetHeight() / dst->m_texture->GetScale().y))));
const GSVector4i clamped_r(r.rintersect(dst->GetUnscaledRect()));
AddDirtyRectTarget(dst, clamped_r, TEX0.PSM, TEX0.TBW, rgba);
dst->UpdateValidity(clamped_r);
dst->UpdateValidBits(GSLocalMemory::m_psm[TEX0.PSM].fmsk);
@ -1815,28 +1795,27 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u
new_TEX0.PSM = DPSM;
const int real_height = GetTargetHeight(DBP, DBW, DPSM, h);
const GSVector2 scale(src->m_texture->GetScale());
dst = LookupTarget(new_TEX0, GSVector2i(static_cast<int>(Common::AlignUpPow2(w, 64) * scale.x),
static_cast<int>(real_height * scale.y)), src->m_type, true);
dst = LookupTarget(new_TEX0, GSVector2i(static_cast<int>(Common::AlignUpPow2(w, 64)),
static_cast<int>(real_height)), src->m_scale, src->m_type, true);
if (dst)
{
dst->m_texture->SetScale(scale);
dst->UpdateValidity(GSVector4i(dx, dy, dx + w, dy + h));
dst->m_valid_bits = src->m_valid_bits;
dst->OffsetHack_modxy = src->OffsetHack_modxy;
}
}
if (!src || !dst || src->m_texture->GetScale() != dst->m_texture->GetScale())
if (!src || !dst || src->m_scale != dst->m_scale)
return false;
// Scale coordinates.
const GSVector2 scale(src->m_texture->GetScale());
const int scaled_sx = static_cast<int>(sx * scale.x);
const int scaled_sy = static_cast<int>(sy * scale.y);
const int scaled_dx = static_cast<int>(dx * scale.x);
const int scaled_dy = static_cast<int>(dy * scale.y);
const int scaled_w = static_cast<int>(w * scale.x);
const int scaled_h = static_cast<int>(h * scale.y);
const float scale = src->m_scale;
const int scaled_sx = static_cast<int>(sx * scale);
const int scaled_sy = static_cast<int>(sy * scale);
const int scaled_dx = static_cast<int>(dx * scale);
const int scaled_dy = static_cast<int>(dy * scale);
const int scaled_w = static_cast<int>(w * scale);
const int scaled_h = static_cast<int>(h * scale);
// The source isn't in our texture, otherwise it could falsely expand the texture causing a misdetection later, which then renders black.
if ((scaled_sx + scaled_w) > src->m_texture->GetWidth() || (scaled_sy + scaled_h) > src->m_texture->GetHeight())
@ -1845,7 +1824,10 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u
// We don't want to copy "old" data that the game has overwritten with writes,
// so flush any overlapping dirty area.
src->UpdateIfDirtyIntersects(GSVector4i(sx, sy, sx + w, sy + h));
dst->UpdateIfDirtyIntersects(GSVector4i(dx, dy, dx + w, dy + h));
// The main point of HW moves is so GPU data can get used as sources. If we don't flush all writes,
// we're not going to be able to use it as a source.
dst->Update(true);
// Expand the target when we used a more conservative size.
const int required_dh = scaled_dy + scaled_h;
@ -1860,11 +1842,10 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u
// We don't recycle the old texture here, because the height cache will track the new size,
// so the old size won't get created again.
const int scaled_new_height = static_cast<int>(static_cast<float>(new_height) * scale.y);
GL_INS("Resize %dx%d target to %dx%d for move", dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_texture->GetHeight(), scaled_new_height);
GL_INS("Resize %dx%d target to %dx%d for move", dst->m_unscaled_size.x, dst->m_unscaled_size.y, dst->m_unscaled_size.x, new_height);
GetTargetHeight(DBP, DBW, DPSM, new_height);
if (!dst->ResizeTexture(dst->m_texture->GetWidth(), scaled_new_height, false))
if (!dst->ResizeTexture(dst->m_unscaled_size.x, new_height, false))
{
// Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU.
// We injected the new height into the cache, so hopefully won't happen again.
@ -1967,7 +1948,7 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
config.indices_per_prim = 3;
config.drawlist = nullptr;
config.scissor = GSVector4i(0, 0, tgt->m_texture->GetWidth(), tgt->m_texture->GetHeight());
config.drawarea = GSVector4i(GSVector4(bbox) * GSVector4(tgt->m_texture->GetScale()).xxyy());
config.drawarea = GSVector4i(GSVector4(bbox) * GSVector4(tgt->m_scale));
config.topology = GSHWDrawConfig::Topology::Triangle;
config.blend = GSHWDrawConfig::BlendState();
config.depth = GSHWDrawConfig::DepthStencilSelector::NoDepth();
@ -1997,12 +1978,12 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
config.ps.tfx = TFX_DECAL;
const GSVector2i rtsize(tgt->m_texture->GetSize());
const GSVector2 rtscale(tgt->m_texture->GetScale());
config.cb_ps.WH = GSVector4(static_cast<float>(rtsize.x) / rtscale.x, static_cast<float>(rtsize.y) / rtscale.y,
const float rtscale = tgt->m_scale;
config.cb_ps.WH = GSVector4(static_cast<float>(rtsize.x) / rtscale, static_cast<float>(rtsize.y) / rtscale,
static_cast<float>(rtsize.x), static_cast<float>(rtsize.y));
config.cb_ps.STScale = rtscale;
config.cb_ps.STScale = GSVector2(1.0f);
config.cb_vs.vertex_scale = GSVector2(2.0f * rtscale.x / (rtsize.x << 4), 2.0f * rtscale.y / (rtsize.y << 4));
config.cb_vs.vertex_scale = GSVector2(2.0f * rtscale / (rtsize.x << 4), 2.0f * rtscale / (rtsize.y << 4));
config.cb_vs.vertex_offset = GSVector2(-1.0f / rtsize.x + 1.0f, -1.0f / rtsize.y + 1.0f);
config.cb_vs.texture_scale = GSVector2((1.0f / 16.0f) / config.cb_ps.WH.x, (1.0f / 16.0f) / config.cb_ps.WH.y);
@ -2067,6 +2048,35 @@ u32 GSTextureCache::GetTargetHeight(u32 fbp, u32 fbw, u32 psm, u32 min_height)
return min_height;
}
bool GSTextureCache::Has32BitTarget(u32 bp)
{
// Look for 32-bit targets at the matching block.
for (auto i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); ++i)
{
const Target* const t = *i;
if (bp == t->m_TEX0.TBP0 && t->m_32_bits_fmt)
{
// May as well move it to the front, because we're going to be looking it up again.
m_dst[RenderTarget].MoveFront(i.Index());
return true;
}
}
// Try depth.
for (auto i = m_dst[DepthStencil].begin(); i != m_dst[DepthStencil].end(); ++i)
{
const Target* const t = *i;
if (bp == t->m_TEX0.TBP0 && t->m_32_bits_fmt)
{
// May as well move it to the front, because we're going to be looking it up again.
m_dst[DepthStencil].MoveFront(i.Index());
return true;
}
}
return false;
}
// Hack: remove Target that are strictly included in current rt. Typically uses for FMV
// For example, game is rendered at 0x800->0x1000, fmv will be uploaded to 0x0->0x2800
// FIXME In theory, we ought to report the data from the sub rt to the main rt. But let's
@ -2226,11 +2236,11 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
if (dst && (x_offset != 0 || y_offset != 0))
{
const GSVector2 scale(dst->m_texture->GetScale());
const int x = static_cast<int>(scale.x * x_offset);
const int y = static_cast<int>(scale.y * y_offset);
const int w = static_cast<int>(scale.x * tw);
const int h = static_cast<int>(scale.y * th);
const float scale = dst->m_scale;
const int x = static_cast<int>(scale * x_offset);
const int y = static_cast<int>(scale * y_offset);
const int w = static_cast<int>(std::ceil(scale * tw));
const int h = static_cast<int>(std::ceil(scale * th));
// if we have a source larger than the target (from tex-in-rt), we need to clear it, otherwise we'll read junk
const bool outside_target = ((x + w) > dst->m_texture->GetWidth() || (y + h) > dst->m_texture->GetHeight());
@ -2247,11 +2257,12 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// Keep a trace of origin of the texture
src->m_texture = dTex;
src->m_scale = scale;
src->m_unscaled_size = GSVector2i(tw, th);
src->m_end_block = dst->m_end_block;
src->m_target = true;
src->m_from_target = &dst->m_texture;
src->m_from_target_TEX0 = dst->m_TEX0;
src->m_texture->SetScale(scale);
src->m_end_block = dst->m_end_block;
if (psm.pal > 0)
{
@ -2273,12 +2284,13 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// Keep a trace of origin of the texture
src->m_texture = dst->m_texture;
src->m_scale = dst->m_scale;
src->m_unscaled_size = dst->m_unscaled_size;
src->m_target = true;
src->m_shared_texture = true;
src->m_from_target = &dst->m_texture;
src->m_from_target_TEX0 = dst->m_TEX0;
src->m_end_block = dst->m_end_block;
src->m_texture->SetScale(dst->m_texture->GetScale());
src->m_32_bits_fmt = dst->m_32_bits_fmt;
// Even if we sample the framebuffer directly we might need the palette
@ -2316,6 +2328,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// Keep a trace of origin of the texture
src->m_target = true;
src->m_unscaled_size = GSVector2i(std::min(dst->m_unscaled_size.x, tw), std::min(dst->m_unscaled_size.y, th));
src->m_from_target = &dst->m_texture;
src->m_from_target_TEX0 = dst->m_TEX0;
src->m_valid_rect = dst->m_valid;
@ -2323,17 +2336,20 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
dst->Update(true);
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
// Rounding up should never exceed the texture size (since it itself should be rounded up), but just in case.
GSVector2i new_size(
std::min(static_cast<int>(std::ceil(static_cast<float>(src->m_unscaled_size.x) * dst->m_scale)),
dst->m_texture->GetWidth()),
std::min(static_cast<int>(std::ceil(static_cast<float>(src->m_unscaled_size.y) * dst->m_scale)),
dst->m_texture->GetHeight()));
const GSVector2i dstsize = dst->m_texture->GetSize();
int w = std::min(dstsize.x, static_cast<int>(dst->m_texture->GetScale().x * tw));
int h = std::min(dstsize.y, static_cast<int>(dst->m_texture->GetScale().y * th));
if (is_8bits)
{
// Unscale 8 bits textures, quality won't be nice but format is really awful
w = tw;
h = th;
src->m_unscaled_size.x = tw;
src->m_unscaled_size.y = th;
new_size.x = tw;
new_size.y = th;
}
// pitch conversion
@ -2396,9 +2412,10 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
}
// width/height conversion
const GSVector2 scale = is_8bits ? GSVector2(1, 1) : dst->m_texture->GetScale();
const float scale = is_8bits ? 1.0f : dst->m_scale;
src->m_scale = scale;
GSVector4i sRect(0, 0, w, h);
GSVector4i sRect = GSVector4i::loadh(new_size);
int destX = 0;
int destY = 0;
@ -2407,17 +2424,18 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT
// which is set to some arbitrary number. h/w are based on the input texture
// so the only reliable way to find the real size of the target is to use the TBW value.
int half_width = static_cast<int>(dst->m_TEX0.TBW * (64 / 2) * dst->m_texture->GetScale().x);
if (half_width < dstsize.x)
const int half_width = static_cast<int>(dst->m_TEX0.TBW * (64 / 2));
if (half_width < dst->m_unscaled_size.x)
{
int copy_width = std::min(half_width, dstsize.x - half_width);
sRect.x = half_width;
sRect.z = half_width + copy_width;
w = copy_width;
const int copy_width = std::min(half_width, dst->m_unscaled_size.x - half_width);
sRect.x = static_cast<int>(static_cast<float>(half_width) * dst->m_scale);
sRect.z = std::min(static_cast<int>(static_cast<float>(half_width + copy_width) * dst->m_scale), dst->m_texture->GetWidth());
new_size.x = sRect.width();
src->m_unscaled_size.x = copy_width;
}
else
{
DevCon.Error("Invalid half-right copy with width %d from %dx%d texture", half_width * 2, w, h);
DevCon.Error("Invalid half-right copy with width %d from %dx%d texture", half_width * 2, dst->m_unscaled_size.x, dst->m_unscaled_size.y);
}
}
else if (src_range && dst->m_TEX0.TBW == TEX0.TBW && !is_8bits)
@ -2455,12 +2473,14 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// It's the same as doing the copy first, except we save GPU time.
if (!half_right && // not the size change from above
use_texture && // not reinterpreting the RT
w == dst->m_texture->GetWidth() && h == dst->m_texture->GetHeight() && // same dimensions
new_size == dst->m_texture->GetSize() && // same dimensions
!m_temporary_source // not the shuffle case above
)
{
// sample the target directly
src->m_texture = dst->m_texture;
src->m_scale = dst->m_scale;
src->m_unscaled_size = dst->m_unscaled_size;
src->m_shared_texture = true;
src->m_target = true; // So renderer can check if a conversion is required
src->m_from_target = &dst->m_texture; // avoid complex condition on the renderer
@ -2478,10 +2498,10 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// 'src' is the new texture cache entry (hence the output)
GSTexture* sTex = dst->m_texture;
GSTexture* dTex = use_texture ?
g_gs_device->CreateTexture(w, h, 1, GSTexture::Format::Color, true) :
g_gs_device->CreateRenderTarget(w, h, GSTexture::Format::Color, false);
dTex->SetScale(scale);
g_gs_device->CreateTexture(new_size.x, new_size.y, 1, GSTexture::Format::Color, true) :
g_gs_device->CreateRenderTarget(new_size.x, new_size.y, GSTexture::Format::Color, source_rect_empty || destX != 0 || destY != 0);
m_source_memory_usage += dTex->GetMemUsage();
src->m_texture = dTex;
if (use_texture)
{
@ -2489,14 +2509,19 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
}
else if (!source_rect_empty)
{
GSVector4 sRectF(sRect);
sRectF.z /= sTex->GetWidth();
sRectF.w /= sTex->GetHeight();
g_gs_device->StretchRect(sTex, sRectF, dTex, GSVector4(destX, destY, w, h), shader, false);
if (is_8bits)
{
g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset,
std::max<u32>(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex,
std::max<u32>(TEX0.TBW, 1u) * 64, TEX0.PSM);
}
else
{
const GSVector4 sRectF = GSVector4(sRect) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight());
g_gs_device->StretchRect(
sTex, sRectF, dTex, GSVector4(destX, destY, new_size.x, new_size.y), shader, false);
}
}
src->m_texture = dTex;
}
// GH: by default (m_paltex == 0) GS converts texture to the 32 bit format
@ -2523,7 +2548,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
}
}
dst->m_texture->OffsetHack_modxy = modxy;
dst->OffsetHack_modxy = modxy;
}
else
{
@ -2532,9 +2557,11 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
const u32* clut = (psm.pal > 0) ? static_cast<const u32*>(g_gs_renderer->m_mem.m_clut) : nullptr;
// adjust texture size to fit
src->m_region = region;
tw = region.HasX() ? region.GetWidth() : tw;
th = region.HasY() ? region.GetHeight() : th;
src->m_region = region;
src->m_unscaled_size = GSVector2i(tw, th);
src->m_scale = 1.0f;
// try the hash cache
if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod, region)) != nullptr)
@ -2568,7 +2595,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
ASSERT(src->m_texture);
ASSERT(src->m_target == (dst != nullptr));
ASSERT(src->m_from_target == (dst ? &dst->m_texture : nullptr));
ASSERT(src->m_texture->GetScale() == ((!dst || TEX0.PSM == PSM_PSMT8) ? GSVector2(1, 1) : dst->m_texture->GetScale()));
ASSERT(src->m_scale == ((!dst || TEX0.PSM == PSM_PSMT8) ? 1.0f : dst->m_scale));
src->SetPages();
@ -2577,14 +2604,14 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
return src;
}
GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFRegTEXA TEXA, SourceRegion region, const GSVector2& scale)
GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFRegTEXA TEXA, SourceRegion region, float scale)
{
// We *should* be able to use the TBW here as an indicator of size... except Destroy All Humans 2 sets
// TBW to 10, and samples from 64 through 703... which means it'd be grabbing the next row at the end.
const int tex_width = std::max<int>(64 * TEX0.TBW, region.GetMaxX());
const int tex_height = region.HasY() ? region.GetHeight() : (1 << TEX0.TH);
const int scaled_width = static_cast<int>(static_cast<float>(tex_width) * scale.x);
const int scaled_height = static_cast<int>(static_cast<float>(tex_height) * scale.y);
const int scaled_width = static_cast<int>(static_cast<float>(tex_width) * scale);
const int scaled_height = static_cast<int>(static_cast<float>(tex_height) * scale);
// Compute new end block based on size.
const u32 end_block = GSLocalMemory::m_psm[TEX0.PSM].info.bn(tex_width - 1, tex_height - 1, TEX0.TBP0, TEX0.TBW);
@ -2649,7 +2676,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
}
// Upload texture -> render target.
const bool linear = (scale.x != 1.0f);
const bool linear = (scale != 1.0f);
copy_queue[copy_count++] = {GSVector4(rect) / GSVector4(lmtex->GetSize()).xyxy(),
GSVector4(rect) * GSVector4(scale).xyxy(), lmtex, linear, 0xf};
};
@ -2730,7 +2757,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
}
// We might not even have a full page valid..
const bool linear = (scale != t->m_texture->GetScale());
const bool linear = (scale != t->m_scale);
const int src_x_end = so.b2a_offset.z;
const int src_y_end = so.b2a_offset.w;
int src_y = so.b2a_offset.y;
@ -2773,7 +2800,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
GL_INS(" Copy from %d,%d -> %d,%d (%dx%d)", src_x, src_y, dst_x, dst_y, copy_width, copy_height);
copy_queue[copy_count++] = {
(GSVector4(src_x, src_y, src_x + copy_width, src_y + copy_height) *
GSVector4(t->m_texture->GetScale()).xyxy()) /
GSVector4(t->m_scale).xyxy()) /
GSVector4(t->m_texture->GetSize()).xyxy(),
GSVector4(dst_x, dst_y, dst_x + copy_width, dst_y + copy_height) *
GSVector4(scale).xyxy(),
@ -2837,7 +2864,6 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
// Allocate our render target for drawing everything to.
GSTexture* dtex = g_gs_device->CreateRenderTarget(scaled_width, scaled_height, GSTexture::Format::Color, true);
dtex->SetScale(scale);
m_source_memory_usage += dtex->GetMemUsage();
// Sort rect list by the texture, we want to batch as many as possible together.
@ -2850,6 +2876,8 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
Source* src = new Source(TEX0, TEXA);
src->m_texture = dtex;
src->m_scale = scale;
src->m_unscaled_size = GSVector2i(tex_width, tex_height);
src->m_end_block = end_block;
src->m_target = true;
@ -3000,34 +3028,38 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
return &m_hash_cache.emplace(key, entry).first->second;
}
GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear)
GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, float scale, int type, const bool clear)
{
ASSERT(type == RenderTarget || type == DepthStencil);
Target* t = new Target(TEX0, !GSConfig.UserHacks_DisableDepthSupport, type);
const int scaled_w = static_cast<int>(std::ceil(static_cast<float>(w) * scale));
const int scaled_h = static_cast<int>(std::ceil(static_cast<float>(h) * scale));
// FIXME: initial data should be unswizzled from local mem in Update() if dirty
// TODO: This leaks if memory allocation fails. Use a unique_ptr so it gets freed, but these
// exceptions really need to get lost.
std::unique_ptr<Target> t = std::make_unique<Target>(TEX0, !GSConfig.UserHacks_DisableDepthSupport, type);
t->m_unscaled_size = GSVector2i(w, h);
t->m_scale = scale;
if (type == RenderTarget)
{
t->m_texture = g_gs_device->CreateRenderTarget(w, h, GSTexture::Format::Color, clear);
t->m_texture = g_gs_device->CreateRenderTarget(scaled_w, scaled_h, GSTexture::Format::Color, clear);
t->m_used = true; // FIXME
}
else if (type == DepthStencil)
{
t->m_texture = g_gs_device->CreateDepthStencil(w, h, GSTexture::Format::DepthStencil, clear);
t->m_texture = g_gs_device->CreateDepthStencil(scaled_w, scaled_h, GSTexture::Format::DepthStencil, clear);
}
t->m_texture->SetScale(static_cast<GSRendererHW*>(g_gs_renderer.get())->GetTextureScaleFactor());
m_target_memory_usage += t->m_texture->GetMemUsage();
m_dst[type].push_front(t);
m_dst[type].push_front(t.get());
return t;
return t.release();
}
GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size)
GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size)
{
for (auto t : m_dst[RenderTarget])
{
@ -3086,6 +3118,7 @@ GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVec
}
offset = this_offset;
*scale = t->m_scale;
return t->m_texture;
}
@ -3151,9 +3184,9 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r)
GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d", t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height());
const GSVector4 src(GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy());
const GSVector4 src(GSVector4(r) * GSVector4(t->m_scale) / GSVector4(t->m_texture->GetSize()).xyxy());
const GSVector4i drc(0, 0, r.width(), r.height());
const bool direct_read = (t->m_texture->GetScale() == GSVector2(1, 1) && ps_shader == ShaderConvert::COPY);
const bool direct_read = (t->m_scale == 1.0f && ps_shader == ShaderConvert::COPY);
if (!PrepareDownloadTexture(drc.z, drc.w, fmt, dltex))
return;
@ -3234,16 +3267,7 @@ void GSTextureCache::Read(Source* t, const GSVector4i& r)
// GSTextureCache::Surface
GSTextureCache::Surface::Surface()
: m_texture(NULL)
, m_from_hash_cache(NULL)
, m_age(0)
, m_32_bits_fmt(false)
, m_shared_texture(false)
, m_end_block(0)
{
m_TEX0.TBP0 = GSTextureCache::MAX_BP;
}
GSTextureCache::Surface::Surface() = default;
GSTextureCache::Surface::~Surface() = default;
@ -3278,21 +3302,9 @@ bool GSTextureCache::Surface::Overlaps(u32 bp, u32 bw, u32 psm, const GSVector4i
// GSTextureCache::Source
GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
: m_palette_obj(nullptr)
, m_palette(nullptr)
, m_valid_rect(0, 0)
, m_lod(0, 0)
, m_target(false)
, m_repeating(false)
, m_p2t(NULL)
, m_from_target(NULL)
, m_from_target_TEX0(TEX0)
{
m_TEX0 = TEX0;
m_TEXA = TEXA;
memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0));
memset(m_layer_hash, 0, sizeof(m_layer_hash));
}
GSTextureCache::Source::~Source()
@ -3599,7 +3611,7 @@ void GSTextureCache::Target::Update(bool reset_age)
// Alternate
// 1/ uses multiple vertex rectangle
if (m_dirty.size() <= 0)
if (m_dirty.empty())
return;
// No handling please
@ -3611,9 +3623,7 @@ void GSTextureCache::Target::Update(bool reset_age)
return;
}
const GSVector2i unscaled_size(static_cast<int>(m_texture->GetWidth() / m_texture->GetScale().x),
static_cast<int>(m_texture->GetHeight() / m_texture->GetScale().y));
const GSVector4i total_rect(m_dirty.GetTotalRect(m_TEX0, unscaled_size));
const GSVector4i total_rect = m_dirty.GetTotalRect(m_TEX0, m_unscaled_size);
if (total_rect.rempty())
{
GL_INS("ERROR: Nothing to update?");
@ -3637,7 +3647,7 @@ void GSTextureCache::Target::Update(bool reset_age)
// Bilinear filtering this is probably not a good thing, at least in native, but upscaling Nearest can be gross and messy.
// It's needed for depth, though.. filtering depth doesn't make much sense, but SMT3 needs it..
const bool upscaled = (m_texture->GetScale().x > 1.0f);
const bool upscaled = (m_scale != 1.0f);
const bool linear = (m_type == RenderTarget && upscaled);
GSDevice::MultiStretchRect* drects = static_cast<GSDevice::MultiStretchRect*>(
@ -3668,7 +3678,7 @@ void GSTextureCache::Target::Update(bool reset_age)
GSDevice::MultiStretchRect& drect = drects[ndrects++];
drect.src = t;
drect.src_rect = GSVector4(r - t_offset) / t_sizef;
drect.dst_rect = GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy();
drect.dst_rect = GSVector4(r) * GSVector4(m_scale);
drect.linear = linear;
// Copy the new GS memory content into the destination texture.
if (m_type == RenderTarget)
@ -3786,22 +3796,19 @@ void GSTextureCache::Target::UpdateValidBits(u32 bits_written)
m_valid_bits |= bits_written;
}
bool GSTextureCache::Target::ResizeTexture(int new_width, int new_height, bool recycle_old)
bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old)
{
return ResizeTexture(new_width, new_height, m_texture->GetScale(), recycle_old);
}
bool GSTextureCache::Target::ResizeTexture(int new_width, int new_height, GSVector2 new_scale, bool recycle_old)
{
const int width = m_texture->GetWidth();
const int height = m_texture->GetHeight();
if (width == new_width && height == new_height)
if (m_unscaled_size.x == new_unscaled_width && m_unscaled_size.y == new_unscaled_height)
return true;
// These exceptions *really* need to get lost. This gets called outside of draws, which just crashes
// when it tries to propogate the exception back.
const int width = m_texture->GetWidth();
const int height = m_texture->GetHeight();
const int new_width = static_cast<int>(std::ceil(new_unscaled_width) * m_scale);
const int new_height = static_cast<int>(std::ceil(new_unscaled_height) * m_scale);
const bool clear = (new_width > width || new_height > height);
// These exceptions *really* need to get lost. This gets called outside of draws, which just crashes
// when it tries to propagate the exception back.
GSTexture* tex = nullptr;
try
{
@ -3819,8 +3826,6 @@ bool GSTextureCache::Target::ResizeTexture(int new_width, int new_height, GSVect
return false;
}
tex->SetScale(new_scale);
const GSVector4i rc(0, 0, std::min(width, new_width), std::min(height, new_height));
if (tex->IsDepthStencil())
{
@ -3843,6 +3848,8 @@ bool GSTextureCache::Target::ResizeTexture(int new_width, int new_height, GSVect
delete m_texture;
m_texture = tex;
m_unscaled_size.x = new_unscaled_width;
m_unscaled_size.y = new_unscaled_height;
return true;
}

View File

@ -118,19 +118,26 @@ public:
class Surface : public GSAlignedClass<32>
{
public:
GSTexture* m_texture;
HashCacheEntry* m_from_hash_cache;
GIFRegTEX0 m_TEX0;
GIFRegTEXA m_TEXA;
int m_age;
bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture
bool m_shared_texture;
u32 m_end_block; // Hint of the surface area.
protected:
Surface();
~Surface();
public:
Surface();
virtual ~Surface();
GSTexture* m_texture = nullptr;
GIFRegTEX0 m_TEX0 = {};
GIFRegTEXA m_TEXA = {};
GSVector2i m_unscaled_size = {};
float m_scale = 0.0f;
int m_age = 0;
u32 m_end_block = MAX_BP; // Hint of the surface area.
bool m_32_bits_fmt = false; // Allow to detect the casting of 32 bits as 16 bits texture
bool m_shared_texture = false;
__fi int GetUnscaledWidth() const { return m_unscaled_size.x; }
__fi int GetUnscaledHeight() const { return m_unscaled_size.y; }
__fi const GSVector2i& GetUnscaledSize() const { return m_unscaled_size; }
__fi GSVector4i GetUnscaledRect() const { return GSVector4i::loadh(m_unscaled_size); }
__fi float GetScale() const { return m_scale; }
/// Returns true if the target wraps around the end of GS memory.
bool Wraps() const { return (m_end_block < m_TEX0.TBP0); }
@ -202,25 +209,27 @@ public:
void Flush(u32 count, int layer);
public:
HashCacheEntry* m_from_hash_cache = nullptr;
std::shared_ptr<Palette> m_palette_obj;
std::unique_ptr<u32[]> m_valid;// each u32 bits map to the 32 blocks of that page
GSTexture* m_palette;
GSVector4i m_valid_rect;
GSVector2i m_lod;
GSTexture* m_palette = nullptr;
GSVector4i m_valid_rect = {};
GSVector2i m_lod = {};
SourceRegion m_region = {};
u8 m_valid_hashes = 0;
u8 m_complete_layers = 0;
bool m_target;
bool m_repeating;
std::vector<GSVector2i>* m_p2t;
bool m_target = false;
bool m_repeating = false;
std::vector<GSVector2i>* m_p2t = nullptr;
// Keep a trace of the target origin. There is no guarantee that pointer will
// still be valid on future. However it ought to be good when the source is created
// so it can be used to access un-converted data for the current draw call.
GSTexture** m_from_target;
GIFRegTEX0 m_from_target_TEX0; // TEX0 of the target texture, if any, else equal to texture TEX0
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
HashType m_layer_hash[7];
GSTexture** m_from_target = nullptr;
GIFRegTEX0 m_from_target_TEX0 = {}; // TEX0 of the target texture, if any, else equal to texture TEX0
GIFRegTEX0 m_layer_TEX0[7] = {}; // Detect already loaded value
HashType m_layer_hash[7] = {};
// Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase
// Deliberately not initialized to save cycles.
std::array<u16, MAX_PAGES> m_erase_it;
GSOffset::PageLooper m_pages;
@ -246,6 +255,7 @@ public:
bool m_dirty_alpha = true;
bool m_is_frame = false;
bool m_used = false;
float OffsetHack_modxy = 0.0f;
GSDirtyRectList m_dirty;
GSVector4i m_valid{};
GSVector4i m_drawn_since_read{};
@ -265,8 +275,8 @@ public:
/// Updates the target, if the dirty area intersects with the specified rectangle.
void UpdateIfDirtyIntersects(const GSVector4i& rc);
bool ResizeTexture(int new_width, int new_height, bool recycle_old = true);
bool ResizeTexture(int new_width, int new_height, GSVector2 new_scale, bool recycle_old = true);
/// Resizes target texture, DOES NOT RESCALE.
bool ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old = true);
};
class PaletteMap
@ -377,7 +387,7 @@ protected:
std::unique_ptr<GSDownloadTexture> m_uint32_download_texture;
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region);
Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear);
Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, float scale, int type, const bool clear);
/// Expands a target when the block pointer for a display framebuffer is within another target, but the read offset
/// plus the height is larger than the current size of the target.
@ -394,7 +404,7 @@ protected:
// TODO: virtual void Write(Source* s, const GSVector4i& r) = 0;
// TODO: virtual void Write(Target* t, const GSVector4i& r) = 0;
Source* CreateMergedSource(GIFRegTEX0 TEX0, GIFRegTEXA TEXA, SourceRegion region, const GSVector2& scale);
Source* CreateMergedSource(GIFRegTEX0 TEX0, GIFRegTEXA TEXA, SourceRegion region, float scale);
public:
GSTextureCache();
@ -412,20 +422,21 @@ public:
void ReadbackAll();
void AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw, RGBAMask rgba);
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size);
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size);
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod);
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette = false);
Target* FindTargetOverlap(u32 bp, u32 end_block, int type, int psm);
Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_w = 0, const int real_h = 0, bool preload = GSConfig.PreloadFrameWithGSData, bool is_clear = false);
Target* LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_w, const int real_h);
Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, float scale, int type, bool used, u32 fbmask = 0, const bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool is_clear = false);
Target* LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, float scale);
/// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly.
Target* GetExactTarget(u32 BP, u32 BW, u32 PSM) const;
Target* GetTargetWithSharedBits(u32 BP, u32 PSM) const;
u32 GetTargetHeight(u32 fbp, u32 fbw, u32 psm, u32 min_height);
bool Has32BitTarget(u32 bp);
void ExpandTarget(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateVideoMemType(int type, u32 bp);

View File

@ -76,17 +76,6 @@ namespace
__fi bool HasPalette() const { return (GSLocalMemory::m_psm[TEX0_PSM].pal > 0); }
__fi bool HasRegion() const { return region.HasEither(); }
__fi GSVector2 ReplacementScale(const GSTextureReplacements::ReplacementTexture& rtex) const
{
return ReplacementScale(rtex.width, rtex.height);
}
__fi GSVector2 ReplacementScale(u32 rwidth, u32 rheight) const
{
return GSVector2(static_cast<float>(rwidth) / static_cast<float>(Width()),
static_cast<float>(rheight) / static_cast<float>(Height()));
}
__fi bool operator==(const TextureName& rhs) const
{
return std::tie(TEX0Hash, CLUTHash, region.bits, bits) ==
@ -467,7 +456,7 @@ GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache:
if (it != s_replacement_texture_cache.end())
{
// replacement is cached, can immediately upload to host GPU
return CreateReplacementTexture(it->second, name.ReplacementScale(it->second), mipmap);
return CreateReplacementTexture(it->second, mipmap);
}
}
@ -493,7 +482,7 @@ GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache:
const ReplacementTexture& rtex = s_replacement_texture_cache.emplace(name, std::move(replacement.value())).first->second;
// and upload to gpu
return CreateReplacementTexture(rtex, name.ReplacementScale(rtex), mipmap);
return CreateReplacementTexture(rtex, mipmap);
}
}
@ -571,7 +560,7 @@ void GSTextureReplacements::ClearReplacementTextures()
s_async_loaded_textures.clear();
}
GSTexture* GSTextureReplacements::CreateReplacementTexture(const ReplacementTexture& rtex, const GSVector2& scale, bool mipmap)
GSTexture* GSTextureReplacements::CreateReplacementTexture(const ReplacementTexture& rtex, bool mipmap)
{
// can't use generated mipmaps with compressed formats, because they can't be rendered to
// in the future I guess we could decompress the dds and generate them... but there's no reason that modders can't generate mips in dds
@ -607,7 +596,6 @@ GSTexture* GSTextureReplacements::CreateReplacementTexture(const ReplacementText
}
}
tex->SetScale(scale);
return tex;
}
@ -626,7 +614,7 @@ void GSTextureReplacements::ProcessAsyncLoadedTextures()
continue;
// upload and inject into TC
GSTexture* tex = CreateReplacementTexture(it->second, name.ReplacementScale(it->second), mipmap);
GSTexture* tex = CreateReplacementTexture(it->second, mipmap);
if (tex)
s_tc->InjectHashCacheTexture(HashCacheKeyFromTextureName(name), tex);
}

View File

@ -49,7 +49,7 @@ namespace GSTextureReplacements
bool HasAnyReplacementTextures();
bool HasReplacementTextureWithOtherPalette(const GSTextureCache::HashCacheKey& hash);
GSTexture* LookupReplacementTexture(const GSTextureCache::HashCacheKey& hash, bool mipmap, bool* pending);
GSTexture* CreateReplacementTexture(const ReplacementTexture& rtex, const GSVector2& scale, bool mipmap);
GSTexture* CreateReplacementTexture(const ReplacementTexture& rtex, bool mipmap);
void ProcessAsyncLoadedTextures();
void DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA,

View File

@ -378,7 +378,8 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) override;
void FlushClears(GSTexture* tex);

View File

@ -749,8 +749,6 @@ bool GSDeviceMTL::Create()
{
// Init metal stuff
m_fn_constants = MRCTransfer([MTLFunctionConstantValues new]);
vector_float2 upscale2 = vector2(GSConfig.UpscaleMultiplier, GSConfig.UpscaleMultiplier);
[m_fn_constants setConstantValue:&upscale2 type:MTLDataTypeFloat2 atIndex:GSMTLConstantIndex_SCALING_FACTOR];
setFnConstantB(m_fn_constants, m_dev.features.framebuffer_fetch, GSMTLConstantIndex_FRAMEBUFFER_FETCH);
m_draw_sync_fence = MRCTransfer([m_dev.dev newFence]);
@ -1262,9 +1260,9 @@ void GSDeviceMTL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
}
}}
void GSDeviceMTL::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
void GSDeviceMTL::UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
GSMTLCLUTConvertPSUniform uniform = { ToSimd(sTex->GetScale()), {offsetX, offsetY}, dOffset };
GSMTLCLUTConvertPSUniform uniform = { sScale, {offsetX, offsetY}, dOffset };
const bool is_clut4 = dSize == 16;
const GSVector4i dRect(0, 0, dSize, 1);
@ -1274,6 +1272,19 @@ void GSDeviceMTL::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, G
RenderCopy(sTex, m_clut_pipeline[!is_clut4], dRect);
}
void GSDeviceMTL::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM)
{ @autoreleasepool {
const ShaderConvert shader = ShaderConvert::RGBA_TO_8I;
id<MTLRenderPipelineState> pipeline = m_convert_pipeline[static_cast<int>(shader)];
if (!pipeline)
[NSException raise:@"StretchRect Missing Pipeline" format:@"No pipeline for %d", static_cast<int>(shader)];
GSMTLIndexedConvertPSUniform uniform = { sScale, SBW, DBW };
const GSVector4 dRect(0, 0, dTex->GetWidth(), dTex->GetHeight());
DoStretchRect(sTex, GSVector4::zero(), dTex, dRect, pipeline, false, LoadAction::DontCareIfFull, &uniform, sizeof(uniform));
}}
void GSDeviceMTL::FlushClears(GSTexture* tex)
{
if (tex)
@ -1607,6 +1618,7 @@ static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, ChannelShuffle) == of
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TCOffsetHack) == offsetof(GSMTLMainPSUniform, tc_offset));
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STScale) == offsetof(GSMTLMainPSUniform, st_scale));
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, DitherMatrix) == offsetof(GSMTLMainPSUniform, dither_matrix));
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, ScaleFactor) == offsetof(GSMTLMainPSUniform, scale_factor));
void GSDeviceMTL::SetupDestinationAlpha(GSTexture* rt, GSTexture* ds, const GSVector4i& r, bool datm)
{

View File

@ -19,8 +19,6 @@
using namespace metal;
constant float2 SCALING_FACTOR [[function_constant(GSMTLConstantIndex_SCALING_FACTOR)]];
struct ConvertShaderData
{
float4 p [[position]];

View File

@ -66,11 +66,18 @@ struct GSMTLCASPSUniform
struct GSMTLCLUTConvertPSUniform
{
vector_float2 scale;
float scale;
vector_uint2 offset;
uint doffset;
};
struct GSMTLIndexedConvertPSUniform
{
float scale;
uint sbw;
uint dbw;
};
struct GSMTLMainVertex
{
vector_float2 st;
@ -127,6 +134,8 @@ struct GSMTLMainPSUniform
vector_float2 tc_offset;
vector_float2 st_scale;
matrix_float4x4 dither_matrix;
vector_float4 scale_factor;
};
enum GSMTLAttributes
@ -151,7 +160,6 @@ enum class GSMTLExpandType : unsigned char
enum GSMTLFnConstants
{
GSMTLConstantIndex_CAS_SHARPEN_ONLY,
GSMTLConstantIndex_SCALING_FACTOR,
GSMTLConstantIndex_FRAMEBUFFER_FETCH,
GSMTLConstantIndex_FST,
GSMTLConstantIndex_IIP,

View File

@ -247,7 +247,7 @@ fragment DepthOut ps_convert_rgb5a1_float16_biln(ConvertShaderData data [[stage_
}
fragment float4 ps_convert_rgba_8i(ConvertShaderData data [[stage_in]], DirectReadTextureIn<float> res,
constant GSMTLConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
constant GSMTLIndexedConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{
// Convert a RGBA texture into a 8 bits packed texture
// Input column: 8x2 RGBA pixels
@ -265,16 +265,22 @@ fragment float4 ps_convert_rgba_8i(ConvertShaderData data [[stage_in]], DirectRe
uint2 subblock = pos & uint2(7, 1);
uint2 coord = block | subblock;
// Compensate for potentially differing page pitch.
uint2 block_xy = coord / uint2(64, 32);
uint block_num = (block_xy.y * (uniform.dbw / 128)) + block_xy.x;
uint2 block_offset = uint2((block_num % (uniform.sbw / 64)) * 64, (block_num / (uniform.sbw / 64)) * 32);
coord = (coord % uint2(64, 32)) + block_offset;
// Apply offset to cols 1 and 2
uint is_col23 = pos.y & 4;
uint is_col13 = pos.y & 2;
uint is_col12 = is_col23 ^ (is_col13 << 1);
coord.x ^= is_col12; // If cols 1 or 2, flip bit 3 of x
if (any(floor(SCALING_FACTOR) != SCALING_FACTOR))
coord = uint2(float2(coord) * SCALING_FACTOR);
if (any(floor(uniform.scale) != uniform.scale))
coord = uint2(float2(coord) * uniform.scale);
else
coord = mul24(coord, uint2(SCALING_FACTOR));
coord = mul24(coord, uint2(uniform.scale));
float4 pixel = res.tex.read(coord);
float2 sel0 = (pos.y & 2) == 0 ? pixel.rb : pixel.ga;

View File

@ -188,7 +188,7 @@ static MainVSOut vs_main_run(thread const MainVSIn& v, constant GSMTLMainVSUnifo
out.t.z = v.f.x; // pack fog with texture
if (VS_POINT_SIZE)
out.point_size = SCALING_FACTOR.x;
out.point_size = cb.point_size.x;
return out;
}
@ -522,7 +522,7 @@ struct PSMain
float4 sample_depth(float2 st)
{
float2 uv_f = float2(clamp_wrap_uv_depth(ushort2(st))) * (SCALING_FACTOR * float2(1.f / 16.f));
float2 uv_f = float2(clamp_wrap_uv_depth(ushort2(st))) * float2(cb.scale_factor.x);
ushort2 uv = ushort2(uv_f);
float4 t = float4(0);
@ -794,7 +794,7 @@ struct PSMain
if (PS_DITHER == 2)
fpos = ushort2(in.p.xy);
else
fpos = ushort2(in.p.xy / SCALING_FACTOR);
fpos = ushort2(in.p.xy * float2(cb.scale_factor.y));
float value = cb.dither_matrix[fpos.y & 3][fpos.x & 3];;
if (PS_ROUND_INV)
C.rgb -= value;

View File

@ -22,7 +22,7 @@ void GSRendererNull::Draw()
{
}
GSTexture* GSRendererNull::GetOutput(int i, int& y_offset)
GSTexture* GSRendererNull::GetOutput(int i, float& scale, int& y_offset)
{
return nullptr;
}

View File

@ -24,5 +24,5 @@ public:
protected:
void Draw() override;
GSTexture* GetOutput(int i, int& y_offset) override;
GSTexture* GetOutput(int i, float& scale, int& y_offset) override;
};

View File

@ -286,15 +286,18 @@ bool GSDeviceOGL::Create()
for (size_t i = 0; i < std::size(m_convert.ps); i++)
{
const char* name = shaderName(static_cast<ShaderConvert>(i));
const std::string macro_sel = (static_cast<ShaderConvert>(i) == ShaderConvert::RGBA_TO_8I) ?
fmt::format("#define PS_SCALE_FACTOR {:.8f}f\n", GSConfig.UpscaleMultiplier) :
std::string();
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, m_shader_common_header, *convert_glsl, macro_sel));
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, m_shader_common_header, *convert_glsl, std::string()));
if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, {}, ps))
return false;
m_convert.ps[i].SetFormattedName("Convert pipe %s", name);
if (static_cast<ShaderConvert>(i) == ShaderConvert::YUV)
if (static_cast<ShaderConvert>(i) == ShaderConvert::RGBA_TO_8I)
{
m_convert.ps[i].RegisterUniform("SBW");
m_convert.ps[i].RegisterUniform("DBW");
m_convert.ps[i].RegisterUniform("ScaleFactor");
}
else if (static_cast<ShaderConvert>(i) == ShaderConvert::YUV)
{
m_convert.ps[i].RegisterUniform("EMOD");
}
@ -971,8 +974,6 @@ std::string GSDeviceOGL::GetVSSource(VSSelector sel)
std::string macro = fmt::format("#define VS_INT_FST {}\n", static_cast<u32>(sel.int_fst))
+ fmt::format("#define VS_IIP {}\n", static_cast<u32>(sel.iip))
+ fmt::format("#define VS_POINT_SIZE {}\n", static_cast<u32>(sel.point_size));
if (sel.point_size)
macro += fmt::format("#define VS_POINT_SIZE_VALUE {:.8f}f\n", GSConfig.UpscaleMultiplier);
std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro);
src += m_shader_common_header;
@ -1044,7 +1045,6 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_FIXED_ONE_A {}\n", sel.fixed_one_a)
+ fmt::format("#define PS_PABE {}\n", sel.pabe)
+ fmt::format("#define PS_SCANMSK {}\n", sel.scanmsk)
+ fmt::format("#define PS_SCALE_FACTOR {:.8f}f\n", GSConfig.UpscaleMultiplier)
+ fmt::format("#define PS_NO_COLOR {}\n", sel.no_color)
+ fmt::format("#define PS_NO_COLOR1 {}\n", sel.no_color1)
+ fmt::format("#define PS_NO_ABLEND {}\n", sel.no_ablend)
@ -1235,13 +1235,13 @@ void GSDeviceOGL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
DrawStretchRect(flip_sr, dRect, ds);
}
void GSDeviceOGL::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
void GSDeviceOGL::UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
GL::Program& prog = m_convert.ps[static_cast<int>(shader)];
prog.Bind();
prog.Uniform3ui(0, offsetX, offsetY, dOffset);
prog.Uniform2f(1, sTex->GetScale().x, sTex->GetScale().y);
prog.Uniform1f(1, sScale);
OMSetDepthStencilState(m_convert.dss);
OMSetBlendState(false);
@ -1255,6 +1255,27 @@ void GSDeviceOGL::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, G
DrawStretchRect(GSVector4::zero(), dRect, dTex->GetSize());
}
void GSDeviceOGL::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM)
{
const ShaderConvert shader = ShaderConvert::RGBA_TO_8I;
GL::Program& prog = m_convert.ps[static_cast<int>(shader)];
prog.Bind();
prog.Uniform1ui(0, SBW);
prog.Uniform1ui(1, DBW);
prog.Uniform1f(2, sScale);
OMSetDepthStencilState(m_convert.dss);
OMSetBlendState(false);
OMSetColorMaskState();
OMSetRenderTargets(dTex, nullptr);
PSSetShaderResource(0, sTex);
PSSetSamplerState(m_convert.pt);
const GSVector4 dRect(0, 0, dTex->GetWidth(), dTex->GetHeight());
DrawStretchRect(GSVector4::zero(), dRect, dTex->GetSize());
}
void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds)
{
// Original code from DX

View File

@ -337,7 +337,9 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) final;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GL::Program& ps, bool alpha_blend, OMColorMaskSelector cms, bool linear = true);
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) final;
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) final;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) final;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) final;
void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) final;
void DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds);

View File

@ -107,7 +107,7 @@ void GSRendererSW::VSync(u32 field, bool registers_written)
// if((m_perfmon.GetFrame() & 255) == 0) m_rl->PrintStats();
}
GSTexture* GSRendererSW::GetOutput(int i, int& y_offset)
GSTexture* GSRendererSW::GetOutput(int i, float& scale, int& y_offset)
{
Sync(1);
@ -194,10 +194,11 @@ GSTexture* GSRendererSW::GetOutput(int i, int& y_offset)
}
}
scale = 1.0f;
return m_texture[index];
}
GSTexture* GSRendererSW::GetFeedbackOutput()
GSTexture* GSRendererSW::GetFeedbackOutput(float& scale)
{
int dummy;
@ -205,7 +206,7 @@ GSTexture* GSRendererSW::GetFeedbackOutput()
for (int i = 0; i < 2; i++)
{
if (m_regs->EXTBUF.EXBP == m_regs->DISP[i].DISPFB.Block())
return GetOutput(i, dummy);
return GetOutput(i, scale, dummy);
}
return nullptr;

View File

@ -72,8 +72,8 @@ protected:
void Reset(bool hardware_reset) override;
void VSync(u32 field, bool registers_written) override;
GSTexture* GetOutput(int i, int& y_offset) override;
GSTexture* GetFeedbackOutput() override;
GSTexture* GetOutput(int i, float& scale, int& y_offset) override;
GSTexture* GetFeedbackOutput(float& scale) override;
void Draw() override;
void Queue(GSRingHeap::SharedPtr<GSRasterizerData>& item);

View File

@ -792,15 +792,17 @@ void GSDeviceVK::BlitRect(GSTexture* sTex, const GSVector4i& sRect, u32 sLevel,
&ib, linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
}
void GSDeviceVK::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
void GSDeviceVK::UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
// Super annoying, but apparently NVIDIA doesn't like floats/ints packed together in the same vec4?
struct Uniforms
{
float scaleX, scaleY;
u32 offsetX, offsetY, dOffset;
u32 offsetX, offsetY, dOffset, pad1;
float scale;
float pad2[3];
};
const Uniforms uniforms = {sTex->GetScale().x, sTex->GetScale().y, offsetX, offsetY, dOffset};
const Uniforms uniforms = {offsetX, offsetY, dOffset, 0, sScale, {}};
SetUtilityPushConstants(&uniforms, sizeof(uniforms));
const GSVector4 dRect(0, 0, dSize, 1);
@ -809,6 +811,26 @@ void GSDeviceVK::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GS
m_convert[static_cast<int>(shader)], false);
}
void GSDeviceVK::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM)
{
struct Uniforms
{
u32 SBW;
u32 DBW;
u32 pad1[2];
float ScaleFactor;
float pad2[3];
};
const Uniforms uniforms = {SBW, DBW, {}, sScale, {}};
SetUtilityPushConstants(&uniforms, sizeof(uniforms));
const ShaderConvert shader = ShaderConvert::RGBA_TO_8I;
const GSVector4 dRect(0, 0, dTex->GetWidth(), dTex->GetHeight());
DoStretchRect(static_cast<GSTextureVK*>(sTex), GSVector4::zero(), static_cast<GSTextureVK*>(dTex), dRect,
m_convert[static_cast<int>(shader)], false);
}
void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect,
const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c, const bool linear)
{
@ -1150,7 +1172,6 @@ VkShaderModule GSDeviceVK::GetUtilityVertexShader(const std::string& source, con
std::stringstream ss;
AddShaderHeader(ss);
AddShaderStageMacro(ss, true, false, false);
AddMacro(ss, "PS_SCALE_FACTOR", StringUtil::ToChars(GSConfig.UpscaleMultiplier).c_str());
if (replace_main)
ss << "#define " << replace_main << " main\n";
ss << source;
@ -1163,7 +1184,6 @@ VkShaderModule GSDeviceVK::GetUtilityFragmentShader(const std::string& source, c
std::stringstream ss;
AddShaderHeader(ss);
AddShaderStageMacro(ss, false, false, true);
AddMacro(ss, "PS_SCALE_FACTOR", StringUtil::ToChars(GSConfig.UpscaleMultiplier).c_str());
if (replace_main)
ss << "#define " << replace_main << " main\n";
ss << source;
@ -2009,8 +2029,6 @@ VkShaderModule GSDeviceVK::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
AddMacro(ss, "VS_FST", sel.fst);
AddMacro(ss, "VS_IIP", sel.iip);
AddMacro(ss, "VS_POINT_SIZE", sel.point_size);
if (sel.point_size)
AddMacro(ss, "VS_POINT_SIZE_VALUE", StringUtil::ToChars(GSConfig.UpscaleMultiplier).c_str());
ss << m_tfx_source;
VkShaderModule mod = g_vulkan_shader_cache->GetVertexShader(ss.str());
@ -2098,7 +2116,6 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
AddMacro(ss, "PS_ZCLAMP", sel.zclamp);
AddMacro(ss, "PS_PABE", sel.pabe);
AddMacro(ss, "PS_SCANMSK", sel.scanmsk);
AddMacro(ss, "PS_SCALE_FACTOR", StringUtil::ToChars(GSConfig.UpscaleMultiplier).c_str());
AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb);
AddMacro(ss, "PS_NO_COLOR", sel.no_color);
AddMacro(ss, "PS_NO_COLOR1", sel.no_color1);

View File

@ -244,7 +244,8 @@ public:
void BlitRect(GSTexture* sTex, const GSVector4i& sRect, u32 sLevel, GSTexture* dTex, const GSVector4i& dRect,
u32 dLevel, bool linear);
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) override;
void SetupDATE(GSTexture* rt, GSTexture* ds, bool datm, const GSVector4i& bbox);
GSTextureVK* SetupPrimitiveTrackingDATE(GSHWDrawConfig& config);

View File

@ -22,13 +22,6 @@
class GSTextureVK final : public GSTexture
{
public:
union alignas(16) ClearValue
{
float color[4];
float depth;
};
public:
GSTextureVK(Type type, Format format, Vulkan::Texture texture);
~GSTextureVK() override;
@ -40,8 +33,6 @@ public:
__fi VkImage GetImage() const { return m_texture.GetImage(); }
__fi VkImageView GetView() const { return m_texture.GetView(); }
__fi VkImageLayout GetLayout() const { return m_texture.GetLayout(); }
__fi GSVector4 GetClearColor() const { return GSVector4::load<true>(m_clear_value.color); }
__fi float GetClearDepth() const { return m_clear_value.depth; }
void* GetNativeHandle() const override;
@ -60,17 +51,6 @@ public:
VkFramebuffer GetLinkedFramebuffer(GSTextureVK* depth_texture, bool feedback_loop);
__fi void SetClearColor(const GSVector4& color)
{
m_state = State::Cleared;
GSVector4::store<true>(m_clear_value.color, color);
}
__fi void SetClearDepth(float depth)
{
m_state = State::Cleared;
m_clear_value.depth = depth;
}
// Call when the texture is bound to the pipeline, or read from in a copy.
__fi void SetUsedThisCommandBuffer()
{
@ -88,8 +68,6 @@ private:
// When this matches the current fence counter, the texture was used this command buffer.
u64 m_use_fence_counter = 0;
ClearValue m_clear_value = {};
GSVector4i m_map_area = GSVector4i::zero();
u32 m_map_level = UINT32_MAX;

View File

@ -15,4 +15,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 20;
static constexpr u32 SHADER_CACHE_VERSION = 21;