GPU/HW: Make copy/write shaders resolution independent
This commit is contained in:
parent
cee1f1772f
commit
74ec7a37da
|
@ -1460,9 +1460,9 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||||
|
|
||||||
// VRAM copy
|
// VRAM copy
|
||||||
{
|
{
|
||||||
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
|
std::unique_ptr<GPUShader> fs =
|
||||||
GPUShaderStage::Fragment, shadergen.GetLanguage(),
|
g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
|
||||||
shadergen.GenerateVRAMCopyFragmentShader(m_resolution_scale, m_write_mask_as_depth), error);
|
shadergen.GenerateVRAMCopyFragmentShader(m_write_mask_as_depth), error);
|
||||||
if (!fs)
|
if (!fs)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -1491,8 +1491,7 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||||
const bool use_ssbo = features.texture_buffers_emulated_with_ssbo;
|
const bool use_ssbo = features.texture_buffers_emulated_with_ssbo;
|
||||||
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
|
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
|
||||||
GPUShaderStage::Fragment, shadergen.GetLanguage(),
|
GPUShaderStage::Fragment, shadergen.GetLanguage(),
|
||||||
shadergen.GenerateVRAMWriteFragmentShader(m_resolution_scale, use_buffer, use_ssbo, m_write_mask_as_depth),
|
shadergen.GenerateVRAMWriteFragmentShader(use_buffer, use_ssbo, m_write_mask_as_depth), error);
|
||||||
error);
|
|
||||||
if (!fs)
|
if (!fs)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -3376,19 +3375,31 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
|
||||||
|
|
||||||
struct VRAMWriteUBOData
|
struct VRAMWriteUBOData
|
||||||
{
|
{
|
||||||
u32 u_dst_x;
|
float u_dst_x;
|
||||||
u32 u_dst_y;
|
float u_dst_y;
|
||||||
u32 u_end_x;
|
float u_end_x;
|
||||||
u32 u_end_y;
|
float u_end_y;
|
||||||
u32 u_width;
|
float u_width;
|
||||||
u32 u_height;
|
float u_height;
|
||||||
|
float u_vram_width;
|
||||||
|
float u_vram_height;
|
||||||
|
float u_resolution_scale;
|
||||||
u32 u_buffer_base_offset;
|
u32 u_buffer_base_offset;
|
||||||
u32 u_mask_or_bits;
|
u32 u_mask_or_bits;
|
||||||
float u_depth_value;
|
float u_depth_value;
|
||||||
};
|
};
|
||||||
const VRAMWriteUBOData uniforms = {
|
const VRAMWriteUBOData uniforms = {static_cast<float>(x % VRAM_WIDTH),
|
||||||
(x % VRAM_WIDTH), (y % VRAM_HEIGHT), ((x + width) % VRAM_WIDTH), ((y + height) % VRAM_HEIGHT), width,
|
static_cast<float>(y % VRAM_HEIGHT),
|
||||||
height, map_index, (set_mask) ? 0x8000u : 0x00, GetCurrentNormalizedVertexDepth()};
|
static_cast<float>((x + width) % VRAM_WIDTH),
|
||||||
|
static_cast<float>((y + height) % VRAM_HEIGHT),
|
||||||
|
static_cast<float>(width),
|
||||||
|
static_cast<float>(height),
|
||||||
|
static_cast<float>(m_vram_texture->GetWidth()),
|
||||||
|
static_cast<float>(m_vram_texture->GetHeight()),
|
||||||
|
static_cast<float>(m_resolution_scale),
|
||||||
|
map_index,
|
||||||
|
(set_mask) ? 0x8000u : 0x00,
|
||||||
|
GetCurrentNormalizedVertexDepth()};
|
||||||
|
|
||||||
// the viewport should already be set to the full vram, so just adjust the scissor
|
// the viewport should already be set to the full vram, so just adjust the scissor
|
||||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||||
|
@ -3458,25 +3469,27 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
|
||||||
|
|
||||||
struct VRAMCopyUBOData
|
struct VRAMCopyUBOData
|
||||||
{
|
{
|
||||||
u32 u_src_x;
|
float u_src_x;
|
||||||
u32 u_src_y;
|
float u_src_y;
|
||||||
u32 u_dst_x;
|
float u_dst_x;
|
||||||
u32 u_dst_y;
|
float u_dst_y;
|
||||||
u32 u_end_x;
|
float u_end_x;
|
||||||
u32 u_end_y;
|
float u_end_y;
|
||||||
u32 u_width;
|
float u_vram_width;
|
||||||
u32 u_height;
|
float u_vram_height;
|
||||||
|
float u_resolution_scale;
|
||||||
u32 u_set_mask_bit;
|
u32 u_set_mask_bit;
|
||||||
float u_depth_value;
|
float u_depth_value;
|
||||||
};
|
};
|
||||||
const VRAMCopyUBOData uniforms = {(src_x % VRAM_WIDTH) * m_resolution_scale,
|
const VRAMCopyUBOData uniforms = {static_cast<float>((src_x % VRAM_WIDTH) * m_resolution_scale),
|
||||||
(src_y % VRAM_HEIGHT) * m_resolution_scale,
|
static_cast<float>((src_y % VRAM_HEIGHT) * m_resolution_scale),
|
||||||
(dst_x % VRAM_WIDTH) * m_resolution_scale,
|
static_cast<float>((dst_x % VRAM_WIDTH) * m_resolution_scale),
|
||||||
(dst_y % VRAM_HEIGHT) * m_resolution_scale,
|
static_cast<float>((dst_y % VRAM_HEIGHT) * m_resolution_scale),
|
||||||
((dst_x + width) % VRAM_WIDTH) * m_resolution_scale,
|
static_cast<float>(((dst_x + width) % VRAM_WIDTH) * m_resolution_scale),
|
||||||
((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale,
|
static_cast<float>(((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale),
|
||||||
width * m_resolution_scale,
|
static_cast<float>(m_vram_texture->GetWidth()),
|
||||||
height * m_resolution_scale,
|
static_cast<float>(m_vram_texture->GetHeight()),
|
||||||
|
static_cast<float>(m_resolution_scale),
|
||||||
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
|
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
|
||||||
GetCurrentNormalizedVertexDepth()};
|
GetCurrentNormalizedVertexDepth()};
|
||||||
|
|
||||||
|
@ -3923,8 +3936,16 @@ void GPU_HW::UpdateDisplay()
|
||||||
reinterpret_start_x + scaled_display_width, scaled_vram_offset_y + read_height, scaled_display_width,
|
reinterpret_start_x + scaled_display_width, scaled_vram_offset_y + read_height, scaled_display_width,
|
||||||
read_height);
|
read_height);
|
||||||
|
|
||||||
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, skip_x, line_skip};
|
struct ExtractUniforms
|
||||||
g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
|
{
|
||||||
|
u32 vram_offset_x;
|
||||||
|
u32 vram_offset_y;
|
||||||
|
float skip_x;
|
||||||
|
float line_skip;
|
||||||
|
};
|
||||||
|
const ExtractUniforms uniforms = {reinterpret_start_x, scaled_vram_offset_y, static_cast<float>(skip_x),
|
||||||
|
static_cast<float>(line_skip)};
|
||||||
|
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||||
|
|
||||||
g_gpu_device->SetViewportAndScissor(0, 0, scaled_display_width, read_height);
|
g_gpu_device->SetViewportAndScissor(0, 0, scaled_display_width, read_height);
|
||||||
g_gpu_device->Draw(3, 0);
|
g_gpu_device->Draw(3, 0);
|
||||||
|
|
|
@ -1194,7 +1194,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMExtractFragmentShader(u32 resolution_s
|
||||||
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
|
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
|
||||||
ss << "CONSTANT uint MULTISAMPLES = " << multisamples << "u;\n";
|
ss << "CONSTANT uint MULTISAMPLES = " << multisamples << "u;\n";
|
||||||
|
|
||||||
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_skip_x", "uint u_line_skip"}, true);
|
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "float u_skip_x", "float u_line_skip"}, true);
|
||||||
DeclareTexture(ss, "samp0", 0, msaa);
|
DeclareTexture(ss, "samp0", 0, msaa);
|
||||||
if (depth_buffer)
|
if (depth_buffer)
|
||||||
DeclareTexture(ss, "samp1", 1, msaa);
|
DeclareTexture(ss, "samp1", 1, msaa);
|
||||||
|
@ -1251,7 +1251,7 @@ float3 SampleVRAM24(uint2 icoords)
|
||||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, depth_buffer ? 2 : 1);
|
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, depth_buffer ? 2 : 1);
|
||||||
ss << R"(
|
ss << R"(
|
||||||
{
|
{
|
||||||
uint2 icoords = uint2(uint(v_pos.x) + u_skip_x, uint(v_pos.y) << u_line_skip);
|
uint2 icoords = uint2(v_pos.x + u_skip_x, v_pos.y * u_line_skip);
|
||||||
int2 wrapped_coords = int2((icoords + u_vram_offset) % VRAM_SIZE);
|
int2 wrapped_coords = int2((icoords + u_vram_offset) % VRAM_SIZE);
|
||||||
|
|
||||||
#if COLOR_24BIT
|
#if COLOR_24BIT
|
||||||
|
@ -1422,7 +1422,7 @@ uint SampleVRAM(uint2 coords)
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(u32 resolution_scale, bool use_buffer, bool use_ssbo,
|
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, bool use_ssbo,
|
||||||
bool write_mask_as_depth) const
|
bool write_mask_as_depth) const
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
|
@ -1432,12 +1432,10 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(u32 resolution_sca
|
||||||
DefineMacro(ss, "WRITE_MASK_AS_DEPTH", write_mask_as_depth);
|
DefineMacro(ss, "WRITE_MASK_AS_DEPTH", write_mask_as_depth);
|
||||||
DefineMacro(ss, "USE_BUFFER", use_buffer);
|
DefineMacro(ss, "USE_BUFFER", use_buffer);
|
||||||
|
|
||||||
ss << "CONSTANT uint RESOLUTION_SCALE = " << resolution_scale << "u;\n";
|
|
||||||
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ");\n";
|
|
||||||
|
|
||||||
DeclareUniformBuffer(ss,
|
DeclareUniformBuffer(ss,
|
||||||
{"uint2 u_base_coords", "uint2 u_end_coords", "uint2 u_size", "uint u_buffer_base_offset",
|
{"float2 u_base_coords", "float2 u_end_coords", "float2 u_size", "float2 u_vram_size",
|
||||||
"uint u_mask_or_bits", "float u_depth_value"},
|
"float u_resolution_scale", "uint u_buffer_base_offset", "uint u_mask_or_bits",
|
||||||
|
"float u_depth_value"},
|
||||||
true);
|
true);
|
||||||
|
|
||||||
if (!use_buffer)
|
if (!use_buffer)
|
||||||
|
@ -1469,7 +1467,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(u32 resolution_sca
|
||||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, false, write_mask_as_depth);
|
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, false, write_mask_as_depth);
|
||||||
ss << R"(
|
ss << R"(
|
||||||
{
|
{
|
||||||
uint2 coords = uint2(v_pos.xy) / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
float2 coords = floor(v_pos.xy / u_resolution_scale);
|
||||||
|
|
||||||
// make sure it's not oversized and out of range
|
// make sure it's not oversized and out of range
|
||||||
if ((coords.x < u_base_coords.x && coords.x >= u_end_coords.x) ||
|
if ((coords.x < u_base_coords.x && coords.x >= u_end_coords.x) ||
|
||||||
|
@ -1479,14 +1477,14 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(u32 resolution_sca
|
||||||
}
|
}
|
||||||
|
|
||||||
// find offset from the start of the row/column
|
// find offset from the start of the row/column
|
||||||
uint2 offset;
|
float2 offset;
|
||||||
offset.x = (coords.x < u_base_coords.x) ? (VRAM_SIZE.x - u_base_coords.x + coords.x) : (coords.x - u_base_coords.x);
|
offset.x = (coords.x < u_base_coords.x) ? (u_vram_size.x - u_base_coords.x + coords.x) : (coords.x - u_base_coords.x);
|
||||||
offset.y = (coords.y < u_base_coords.y) ? (VRAM_SIZE.y - u_base_coords.y + coords.y) : (coords.y - u_base_coords.y);
|
offset.y = (coords.y < u_base_coords.y) ? (u_vram_size.y - u_base_coords.y + coords.y) : (coords.y - u_base_coords.y);
|
||||||
|
|
||||||
#if !USE_BUFFER
|
#if !USE_BUFFER
|
||||||
uint value = LOAD_TEXTURE(samp0, int2(offset), 0).x;
|
uint value = LOAD_TEXTURE(samp0, int2(offset), 0).x;
|
||||||
#else
|
#else
|
||||||
uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x;
|
uint buffer_offset = u_buffer_base_offset + uint((offset.y * u_size.x) + offset.x);
|
||||||
uint value = GET_VALUE(buffer_offset) | u_mask_or_bits;
|
uint value = GET_VALUE(buffer_offset) | u_mask_or_bits;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1499,7 +1497,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(u32 resolution_sca
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader(u32 resolution_scale, bool write_mask_as_depth) const
|
std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader(bool write_mask_as_depth) const
|
||||||
{
|
{
|
||||||
// TODO: This won't currently work because we can't bind the texture to both the shader and framebuffer.
|
// TODO: This won't currently work because we can't bind the texture to both the shader and framebuffer.
|
||||||
const bool msaa = false;
|
const bool msaa = false;
|
||||||
|
@ -1509,19 +1507,16 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader(u32 resolution_scal
|
||||||
DefineMacro(ss, "WRITE_MASK_AS_DEPTH", write_mask_as_depth);
|
DefineMacro(ss, "WRITE_MASK_AS_DEPTH", write_mask_as_depth);
|
||||||
DefineMacro(ss, "MSAA_COPY", msaa);
|
DefineMacro(ss, "MSAA_COPY", msaa);
|
||||||
|
|
||||||
ss << "CONSTANT uint RESOLUTION_SCALE = " << resolution_scale << "u;\n";
|
|
||||||
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
|
|
||||||
|
|
||||||
DeclareUniformBuffer(ss,
|
DeclareUniformBuffer(ss,
|
||||||
{"uint2 u_src_coords", "uint2 u_dst_coords", "uint2 u_end_coords", "uint2 u_size",
|
{"float2 u_src_coords", "float2 u_dst_coords", "float2 u_end_coords", "float2 u_vram_size",
|
||||||
"bool u_set_mask_bit", "float u_depth_value"},
|
"float u_resolution_scale", "bool u_set_mask_bit", "float u_depth_value"},
|
||||||
true);
|
true);
|
||||||
|
|
||||||
DeclareTexture(ss, "samp0", 0, msaa);
|
DeclareTexture(ss, "samp0", 0, msaa);
|
||||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, false, write_mask_as_depth, false, false, msaa);
|
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, false, write_mask_as_depth, false, false, msaa);
|
||||||
ss << R"(
|
ss << R"(
|
||||||
{
|
{
|
||||||
uint2 dst_coords = uint2(v_pos.xy);
|
float2 dst_coords = floor(v_pos.xy);
|
||||||
|
|
||||||
// make sure it's not oversized and out of range
|
// make sure it's not oversized and out of range
|
||||||
if ((dst_coords.x < u_dst_coords.x && dst_coords.x >= u_end_coords.x) ||
|
if ((dst_coords.x < u_dst_coords.x && dst_coords.x >= u_end_coords.x) ||
|
||||||
|
@ -1531,12 +1526,13 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader(u32 resolution_scal
|
||||||
}
|
}
|
||||||
|
|
||||||
// find offset from the start of the row/column
|
// find offset from the start of the row/column
|
||||||
uint2 offset;
|
float2 offset;
|
||||||
offset.x = (dst_coords.x < u_dst_coords.x) ? (VRAM_SIZE.x - u_dst_coords.x + dst_coords.x) : (dst_coords.x - u_dst_coords.x);
|
offset.x = (dst_coords.x < u_dst_coords.x) ? (u_vram_size.x - u_dst_coords.x + dst_coords.x) : (dst_coords.x - u_dst_coords.x);
|
||||||
offset.y = (dst_coords.y < u_dst_coords.y) ? (VRAM_SIZE.y - u_dst_coords.y + dst_coords.y) : (dst_coords.y - u_dst_coords.y);
|
offset.y = (dst_coords.y < u_dst_coords.y) ? (u_vram_size.y - u_dst_coords.y + dst_coords.y) : (dst_coords.y - u_dst_coords.y);
|
||||||
|
|
||||||
// find the source coordinates to copy from
|
// find the source coordinates to copy from
|
||||||
uint2 src_coords = (u_src_coords + offset) % VRAM_SIZE;
|
float2 offset_coords = u_src_coords + offset;
|
||||||
|
float2 src_coords = offset_coords - (floor(offset_coords / u_vram_size) * u_vram_size);
|
||||||
|
|
||||||
// sample and apply mask bit
|
// sample and apply mask bit
|
||||||
#if MSAA_COPY
|
#if MSAA_COPY
|
||||||
|
|
|
@ -26,9 +26,8 @@ public:
|
||||||
std::string GenerateWireframeGeometryShader() const;
|
std::string GenerateWireframeGeometryShader() const;
|
||||||
std::string GenerateWireframeFragmentShader() const;
|
std::string GenerateWireframeFragmentShader() const;
|
||||||
std::string GenerateVRAMReadFragmentShader(u32 resolution_scale, u32 multisamples) const;
|
std::string GenerateVRAMReadFragmentShader(u32 resolution_scale, u32 multisamples) const;
|
||||||
std::string GenerateVRAMWriteFragmentShader(u32 resolution_scale, bool use_buffer, bool use_ssbo,
|
std::string GenerateVRAMWriteFragmentShader(bool use_buffer, bool use_ssbo, bool write_mask_as_depth) const;
|
||||||
bool write_mask_as_depth) const;
|
std::string GenerateVRAMCopyFragmentShader(bool write_mask_as_depth) const;
|
||||||
std::string GenerateVRAMCopyFragmentShader(u32 resolution_scale, bool write_mask_as_depth) const;
|
|
||||||
std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced, bool write_mask_as_depth) const;
|
std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced, bool write_mask_as_depth) const;
|
||||||
std::string GenerateVRAMUpdateDepthFragmentShader(u32 multisamples) const;
|
std::string GenerateVRAMUpdateDepthFragmentShader(u32 multisamples) const;
|
||||||
std::string GenerateVRAMExtractFragmentShader(u32 resolution_scale, u32 multisamples, bool color_24bit,
|
std::string GenerateVRAMExtractFragmentShader(u32 resolution_scale, u32 multisamples, bool color_24bit,
|
||||||
|
|
Loading…
Reference in New Issue