Merge pull request #5849 from beholdnec/tex-format-bookkeeping

Video: Clearly separate Texture and EFB Copy formats
This commit is contained in:
Anthony 2017-08-04 22:34:34 -07:00 committed by GitHub
commit 737e045bd8
27 changed files with 1275 additions and 1319 deletions

View File

@ -87,9 +87,9 @@ void PSTextureEncoder::Shutdown()
SAFE_RELEASE(m_out);
}
void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_width,
void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half)
{
if (!m_ready) // Make sure we initialized OK
return;
@ -100,7 +100,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_w
// FIXME: Instead of resolving EFB, it would be better to pick out a
// single sample from each pixel. The game may break if it isn't
// expecting the blurred edges around multisampled shapes.
ID3D11ShaderResourceView* pEFB = is_depth_copy ?
ID3D11ShaderResourceView* pEFB = params.depth ?
FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() :
FramebufferManager::GetResolvedEFBColorTexture()->GetSRV();
@ -119,12 +119,12 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_w
D3D::context->OMSetRenderTargets(1, &m_outRTV, nullptr);
EFBEncodeParams params;
params.SrcLeft = src_rect.left;
params.SrcTop = src_rect.top;
params.DestWidth = native_width;
params.ScaleFactor = scale_by_half ? 2 : 1;
D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, &params, 0, 0);
EFBEncodeParams encode_params;
encode_params.SrcLeft = src_rect.left;
encode_params.SrcTop = src_rect.top;
encode_params.DestWidth = native_width;
encode_params.ScaleFactor = scale_by_half ? 2 : 1;
D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, &encode_params, 0, 0);
D3D::stateman->SetPixelConstants(m_encodeParams);
// We also linear filtering for both box filtering and downsampling higher resolutions to 1x
@ -137,7 +137,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_w
D3D::SetPointCopySampler();
D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), g_renderer->GetTargetWidth(),
g_renderer->GetTargetHeight(), GetEncodingPixelShader(format),
g_renderer->GetTargetHeight(), GetEncodingPixelShader(params),
VertexShaderCache::GetSimpleVertexShader(),
VertexShaderCache::GetSimpleInputLayout());
@ -168,18 +168,18 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_w
FramebufferManager::GetEFBDepthTexture()->GetDSV());
}
ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyFormat& format)
ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyParams& params)
{
auto iter = m_encoding_shaders.find(format);
auto iter = m_encoding_shaders.find(params);
if (iter != m_encoding_shaders.end())
return iter->second;
D3DBlob* bytecode = nullptr;
const char* shader = TextureConversionShader::GenerateEncodingShader(format, APIType::D3D);
const char* shader = TextureConversionShader::GenerateEncodingShader(params, APIType::D3D);
if (!D3D::CompilePixelShader(shader, &bytecode))
{
PanicAlert("Failed to compile texture encoding shader.");
m_encoding_shaders[format] = nullptr;
m_encoding_shaders[params] = nullptr;
return nullptr;
}
@ -188,7 +188,7 @@ ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyFormat&
D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), nullptr, &newShader);
CHECK(SUCCEEDED(hr), "create efb encoder pixel shader");
m_encoding_shaders.emplace(format, newShader);
m_encoding_shaders.emplace(params, newShader);
return newShader;
}
}

View File

@ -32,12 +32,12 @@ public:
void Init();
void Shutdown();
void Encode(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, const EFBRectangle& src_rect,
void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half);
private:
ID3D11PixelShader* GetEncodingPixelShader(const EFBCopyFormat& format);
ID3D11PixelShader* GetEncodingPixelShader(const EFBCopyParams& params);
bool m_ready;
@ -45,6 +45,6 @@ private:
ID3D11RenderTargetView* m_outRTV;
ID3D11Texture2D* m_outStage;
ID3D11Buffer* m_encodeParams;
std::map<EFBCopyFormat, ID3D11PixelShader*> m_encoding_shaders;
std::map<EFBCopyParams, ID3D11PixelShader*> m_encoding_shaders;
};
}

View File

@ -38,12 +38,12 @@ std::unique_ptr<AbstractTexture> TextureCache::CreateTexture(const TextureConfig
return std::make_unique<DXTexture>(config);
}
void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width,
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half)
{
g_encoder->Encode(dst, format, native_width, bytes_per_row, num_blocks_y, memory_stride,
is_depth_copy, src_rect, scale_by_half);
g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect,
scale_by_half);
}
const char palette_shader[] =
@ -126,8 +126,8 @@ void main(
}
)HLSL";
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette,
TlutFormat format)
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
const void* palette, TLUTFormat format)
{
DXTexture* source_texture = static_cast<DXTexture*>(source->texture.get());
DXTexture* destination_texture = static_cast<DXTexture*>(destination->texture.get());
@ -144,7 +144,7 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
D3D::stateman->SetTexture(1, palette_buf_srv);
// TODO: Add support for C14X2 format. (Different multiplier, more palette entries.)
float params[4] = {(source->format & 0xf) == GX_TF_I4 ? 15.f : 255.f};
float params[4] = {source->format == TextureFormat::I4 ? 15.f : 255.f};
D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, &params, 0, 0);
D3D::stateman->SetPixelConstants(palette_uniform);
@ -163,8 +163,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
// Create texture copy
D3D::drawShadedTexQuad(
source_texture->GetRawTexIdentifier()->GetSRV(), &sourcerect, source->GetWidth(),
source->GetHeight(), palette_pixel_shader[format], VertexShaderCache::GetSimpleVertexShader(),
VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader());
source->GetHeight(), palette_pixel_shader[static_cast<int>(format)],
VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(),
GeometryShaderCache::GetCopyGeometryShader());
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(),
FramebufferManager::GetEFBDepthTexture()->GetDSV());
@ -190,9 +191,9 @@ TextureCache::TextureCache()
palette_buf = nullptr;
palette_buf_srv = nullptr;
palette_uniform = nullptr;
palette_pixel_shader[GX_TL_IA8] = GetConvertShader("IA8");
palette_pixel_shader[GX_TL_RGB565] = GetConvertShader("RGB565");
palette_pixel_shader[GX_TL_RGB5A3] = GetConvertShader("RGB5A3");
palette_pixel_shader[static_cast<int>(TLUTFormat::IA8)] = GetConvertShader("IA8");
palette_pixel_shader[static_cast<int>(TLUTFormat::RGB565)] = GetConvertShader("RGB565");
palette_pixel_shader[static_cast<int>(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3");
auto lutBd = CD3D11_BUFFER_DESC(sizeof(u16) * 256, D3D11_BIND_SHADER_RESOURCE);
HRESULT hr = D3D::device->CreateBuffer(&lutBd, nullptr, &palette_buf);
CHECK(SUCCEEDED(hr), "create palette decoder lut buffer");

View File

@ -28,12 +28,12 @@ private:
return 0;
};
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette,
TlutFormat format) override;
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TLUTFormat format) override;
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half) override;
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override;
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override;

View File

@ -20,14 +20,14 @@ public:
~TextureCache() {}
bool CompileShaders() override { return true; }
void DeleteShaders() override {}
void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, void* palette,
TlutFormat format) override
void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TLUTFormat format) override
{
}
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half) override
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override
{
}

View File

@ -9,6 +9,7 @@
#include <memory>
#include <vector>
#include "Common/Assert.h"
#include "Common/GL/GLInterfaceBase.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
@ -32,12 +33,18 @@ namespace OGL
{
static u32 s_ColorCbufid;
static u32 s_DepthCbufid;
static SHADER s_palette_pixel_shader[3];
struct PaletteShader
{
SHADER shader;
GLuint buffer_offset_uniform;
GLuint multiplier_uniform;
GLuint copy_position_uniform;
};
static PaletteShader s_palette_shader[3];
static std::unique_ptr<StreamBuffer> s_palette_stream_buffer;
static GLuint s_palette_resolv_texture;
static GLuint s_palette_buffer_offset_uniform[3];
static GLuint s_palette_multiplier_uniform[3];
static GLuint s_palette_copy_position_uniform[3];
static GLuint s_palette_resolv_texture = 0;
struct TextureDecodingProgramInfo
{
@ -64,12 +71,12 @@ std::unique_ptr<AbstractTexture> TextureCache::CreateTexture(const TextureConfig
return std::make_unique<OGLTexture>(config);
}
void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width,
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half)
{
TextureConverter::EncodeToRamFromTexture(dst, format, native_width, bytes_per_row, num_blocks_y,
memory_stride, is_depth_copy, src_rect, scale_by_half);
TextureConverter::EncodeToRamFromTexture(dst, params, native_width, bytes_per_row, num_blocks_y,
memory_stride, src_rect, scale_by_half);
}
TextureCache::TextureCache()
@ -126,6 +133,23 @@ GLuint TextureCache::GetColorCopyPositionUniform() const
return m_colorCopyPositionUniform;
}
static bool CompilePaletteShader(TLUTFormat tlutfmt, const std::string& vcode,
const std::string& pcode, const std::string& gcode)
{
_assert_(IsValidTLUTFormat(tlutfmt));
PaletteShader& shader = s_palette_shader[static_cast<int>(tlutfmt)];
if (!ProgramShaderCache::CompileShader(shader.shader, vcode, pcode, gcode))
return false;
shader.buffer_offset_uniform =
glGetUniformLocation(shader.shader.glprogid, "texture_buffer_offset");
shader.multiplier_uniform = glGetUniformLocation(shader.shader.glprogid, "multiplier");
shader.copy_position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position");
return true;
}
bool TextureCache::CompileShaders()
{
constexpr const char* color_copy_program = "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
@ -315,44 +339,17 @@ bool TextureCache::CompileShaders()
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
if (!ProgramShaderCache::CompileShader(
s_palette_pixel_shader[GX_TL_IA8], StringFromFormat(vertex_program, prefix, prefix),
"#define DECODE DecodePixel_IA8" + palette_shader, geo_program))
{
if (!CompilePaletteShader(TLUTFormat::IA8, StringFromFormat(vertex_program, prefix, prefix),
"#define DECODE DecodePixel_IA8" + palette_shader, geo_program))
return false;
}
s_palette_buffer_offset_uniform[GX_TL_IA8] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "texture_buffer_offset");
s_palette_multiplier_uniform[GX_TL_IA8] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "multiplier");
s_palette_copy_position_uniform[GX_TL_IA8] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "copy_position");
if (!ProgramShaderCache::CompileShader(
s_palette_pixel_shader[GX_TL_RGB565], StringFromFormat(vertex_program, prefix, prefix),
"#define DECODE DecodePixel_RGB565" + palette_shader, geo_program))
{
if (!CompilePaletteShader(TLUTFormat::RGB565, StringFromFormat(vertex_program, prefix, prefix),
"#define DECODE DecodePixel_RGB565" + palette_shader, geo_program))
return false;
}
s_palette_buffer_offset_uniform[GX_TL_RGB565] = glGetUniformLocation(
s_palette_pixel_shader[GX_TL_RGB565].glprogid, "texture_buffer_offset");
s_palette_multiplier_uniform[GX_TL_RGB565] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB565].glprogid, "multiplier");
s_palette_copy_position_uniform[GX_TL_RGB565] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB565].glprogid, "copy_position");
if (!ProgramShaderCache::CompileShader(
s_palette_pixel_shader[GX_TL_RGB5A3], StringFromFormat(vertex_program, prefix, prefix),
"#define DECODE DecodePixel_RGB5A3" + palette_shader, geo_program))
{
if (!CompilePaletteShader(TLUTFormat::RGB5A3, StringFromFormat(vertex_program, prefix, prefix),
"#define DECODE DecodePixel_RGB5A3" + palette_shader, geo_program))
return false;
}
s_palette_buffer_offset_uniform[GX_TL_RGB5A3] = glGetUniformLocation(
s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "texture_buffer_offset");
s_palette_multiplier_uniform[GX_TL_RGB5A3] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "multiplier");
s_palette_copy_position_uniform[GX_TL_RGB5A3] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "copy_position");
}
return true;
@ -364,16 +361,19 @@ void TextureCache::DeleteShaders()
m_depthMatrixProgram.Destroy();
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
for (auto& shader : s_palette_pixel_shader)
shader.Destroy();
for (auto& shader : s_palette_shader)
shader.shader.Destroy();
}
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette,
TlutFormat format)
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
const void* palette, TLUTFormat tlutfmt)
{
if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion)
return;
_assert_(IsValidTLUTFormat(tlutfmt));
const PaletteShader& palette_shader = s_palette_shader[static_cast<int>(tlutfmt)];
g_renderer->ResetAPIState();
OGLTexture* source_texture = static_cast<OGLTexture*>(source->texture.get());
@ -385,16 +385,17 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer());
glViewport(0, 0, destination->GetWidth(), destination->GetHeight());
s_palette_pixel_shader[format].Bind();
palette_shader.shader.Bind();
// C14 textures are currently unsupported
int size = (source->format & 0xf) == GX_TF_I4 ? 32 : 512;
int size = source->format == TextureFormat::I4 ? 32 : 512;
auto buffer = s_palette_stream_buffer->Map(size);
memcpy(buffer.first, palette, size);
s_palette_stream_buffer->Unmap(size);
glUniform1i(s_palette_buffer_offset_uniform[format], buffer.second / 2);
glUniform1f(s_palette_multiplier_uniform[format], (source->format & 0xf) == 0 ? 15.0f : 255.0f);
glUniform4f(s_palette_copy_position_uniform[format], 0.0f, 0.0f,
glUniform1i(palette_shader.buffer_offset_uniform, buffer.second / 2);
glUniform1f(palette_shader.multiplier_uniform,
source->format == TextureFormat::I4 ? 15.0f : 255.0f);
glUniform4f(palette_shader.copy_position_uniform, 0.0f, 0.0f,
static_cast<float>(source->GetWidth()), static_cast<float>(source->GetHeight()));
glActiveTexture(GL_TEXTURE10);
@ -441,7 +442,7 @@ void DestroyTextureDecodingResources()
s_texture_decoding_program_info.clear();
}
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format)
{
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key);
@ -483,7 +484,7 @@ bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat pal
void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
const u8* palette, TLUTFormat palette_format)
{
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key);

View File

@ -26,23 +26,23 @@ public:
static TextureCache* GetInstance();
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override;
void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format) override;
TLUTFormat palette_format) override;
const SHADER& GetColorCopyProgram() const;
GLuint GetColorCopyPositionUniform() const;
private:
std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config) override;
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette,
TlutFormat format) override;
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TLUTFormat format) override;
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half) override;
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override;
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override;

View File

@ -51,7 +51,7 @@ struct EncodingProgram
SHADER program;
GLint copy_position_uniform;
};
static std::map<EFBCopyFormat, EncodingProgram> s_encoding_programs;
static std::map<EFBCopyParams, EncodingProgram> s_encoding_programs;
static GLuint s_PBO = 0; // for readback with different strides
@ -136,13 +136,13 @@ static void CreatePrograms()
ProgramShaderCache::CompileShader(s_yuyvToRgbProgram, VProgramYuyvToRgb, FProgramYuyvToRgb);
}
static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyFormat& format)
static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)
{
auto iter = s_encoding_programs.find(format);
auto iter = s_encoding_programs.find(params);
if (iter != s_encoding_programs.end())
return iter->second;
const char* shader = TextureConversionShader::GenerateEncodingShader(format, APIType::OpenGL);
const char* shader = TextureConversionShader::GenerateEncodingShader(params, APIType::OpenGL);
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader)
@ -166,7 +166,7 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyFormat& format)
PanicAlert("Failed to compile texture encoding shader.");
program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position");
return s_encoding_programs.emplace(format, program).first->second;
return s_encoding_programs.emplace(params, program).first->second;
}
void Init()
@ -271,24 +271,24 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyFormat& format, u32 native_width,
void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half)
{
g_renderer->ResetAPIState();
EncodingProgram& texconv_shader = GetOrCreateEncodingShader(format);
EncodingProgram& texconv_shader = GetOrCreateEncodingShader(params);
texconv_shader.program.Bind();
glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width,
scale_by_half ? 2 : 1);
const GLuint read_texture = is_depth_copy ?
const GLuint read_texture = params.depth ?
FramebufferManager::ResolveAndGetDepthTarget(src_rect) :
FramebufferManager::ResolveAndGetRenderTarget(src_rect);
EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride,
scale_by_half && !is_depth_copy);
scale_by_half && !params.depth);
FramebufferManager::SetFramebuffer(0);
g_renderer->RestoreAPIState();

View File

@ -7,9 +7,10 @@
#include "Common/CommonTypes.h"
#include "Common/GL/GLUtil.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VideoCommon.h"
struct EFBCopyParams;
namespace OGL
{
// Converts textures between formats using shaders
@ -25,9 +26,9 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* des
void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTexture);
// returns size of the encoded data (in bytes)
void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyFormat& format, u32 native_width,
void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half);
const EFBRectangle& src_rect, bool scale_by_half);
}
} // namespace OGL

View File

@ -50,13 +50,13 @@ class TextureCache : public TextureCacheBase
public:
bool CompileShaders() override { return true; }
void DeleteShaders() override {}
void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, void* palette,
TlutFormat format) override
void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TLUTFormat format) override
{
}
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half) override
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override
{
EfbCopy::CopyEfb();
}

File diff suppressed because it is too large Load Diff

View File

@ -111,13 +111,14 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
TexMode0& tm0 = texUnit.texMode0[subTexmap];
TexImage0& ti0 = texUnit.texImage0[subTexmap];
TexTLUT& texTlut = texUnit.texTlut[subTexmap];
TlutFormat tlutfmt = (TlutFormat)texTlut.tlut_format;
TextureFormat texfmt = static_cast<TextureFormat>(ti0.format);
TLUTFormat tlutfmt = static_cast<TLUTFormat>(texTlut.tlut_format);
u8 *imageSrc, *imageSrcOdd = nullptr;
if (texUnit.texImage1[subTexmap].image_type)
{
imageSrc = &texMem[texUnit.texImage1[subTexmap].tmem_even * TMEM_LINE_SIZE];
if (ti0.format == GX_TF_RGBA8)
if (texfmt == TextureFormat::RGBA8)
imageSrcOdd = &texMem[texUnit.texImage2[subTexmap].tmem_odd * TMEM_LINE_SIZE];
}
else
@ -139,9 +140,9 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
int mipWidth = imageWidth + 1;
int mipHeight = imageHeight + 1;
int fmtWidth = TexDecoder_GetBlockWidthInTexels(ti0.format);
int fmtHeight = TexDecoder_GetBlockHeightInTexels(ti0.format);
int fmtDepth = TexDecoder_GetTexelSizeInNibbles(ti0.format);
int fmtWidth = TexDecoder_GetBlockWidthInTexels(texfmt);
int fmtHeight = TexDecoder_GetBlockHeightInTexels(texfmt);
int fmtDepth = TexDecoder_GetTexelSizeInNibbles(texfmt);
imageWidth >>= mip;
imageHeight >>= mip;
@ -186,21 +187,21 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
WrapCoord(&imageSPlus1, tm0.wrap_s, imageWidth);
WrapCoord(&imageTPlus1, tm0.wrap_t, imageHeight);
if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type))
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].image_type))
{
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut,
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, texfmt, tlut,
tlutfmt);
SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format,
tlut, tlutfmt);
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, texfmt, tlut,
tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format,
tlut, tlutfmt);
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, texfmt, tlut,
tlutfmt);
AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format,
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, texfmt,
tlut, tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (fractT));
}
@ -238,9 +239,8 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
WrapCoord(&imageS, tm0.wrap_s, imageWidth);
WrapCoord(&imageT, tm0.wrap_t, imageHeight);
if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type))
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut,
tlutfmt);
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].image_type))
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, texfmt, tlut, tlutfmt);
else
TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT,
imageWidth);

View File

@ -95,8 +95,8 @@ bool TextureCache::Initialize()
return true;
}
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette,
TlutFormat format)
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
const void* palette, TLUTFormat format)
{
m_texture_converter->ConvertTexture(destination, source, m_render_pass, palette, format);
@ -111,9 +111,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width,
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half)
{
// Flush EFB pokes first, as they're expected to be included.
FramebufferManager::GetInstance()->FlushEFBPokes();
@ -128,7 +128,7 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_widt
region = Util::ClampRect2D(region, FramebufferManager::GetInstance()->GetEFBWidth(),
FramebufferManager::GetInstance()->GetEFBHeight());
Texture2D* src_texture;
if (is_depth_copy)
if (params.depth)
src_texture = FramebufferManager::GetInstance()->ResolveEFBDepthTexture(region);
else
src_texture = FramebufferManager::GetInstance()->ResolveEFBColorTexture(region);
@ -144,15 +144,15 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_widt
src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_texture_converter->EncodeTextureToMemory(src_texture->GetView(), dst, format, native_width,
bytes_per_row, num_blocks_y, memory_stride,
is_depth_copy, src_rect, scale_by_half);
m_texture_converter->EncodeTextureToMemory(src_texture->GetView(), dst, params, native_width,
bytes_per_row, num_blocks_y, memory_stride, src_rect,
scale_by_half);
// Transition back to original state
src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout);
}
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format)
{
return m_texture_converter->SupportsTextureDecoding(format, palette_format);
}
@ -160,7 +160,7 @@ bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat pal
void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
const u8* palette, TLUTFormat palette_format)
{
// Group compute shader dispatches together in the init command buffer. That way we don't have to
// pay a penalty for switching from graphics->compute, or end/restart our render pass.

View File

@ -33,19 +33,19 @@ public:
std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config) override;
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette,
TlutFormat format) override;
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TLUTFormat format) override;
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half) override;
void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) override;
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override;
void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format) override;
TLUTFormat palette_format) override;
VkShaderModule GetCopyShader() const;
VkRenderPass GetTextureCopyRenderPass() const;

View File

@ -166,7 +166,7 @@ TextureConverter::GetCommandBufferForTextureConversion(const TextureCache::TCach
void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
TextureCacheBase::TCacheEntry* src_entry,
VkRenderPass render_pass, const void* palette,
TlutFormat palette_format)
TLUTFormat palette_format)
{
struct PSUniformBlock
{
@ -182,7 +182,7 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
_assert_(destination_texture->GetConfig().rendertarget);
// We want to align to 2 bytes (R16) or the device's texel buffer alignment, whichever is greater.
size_t palette_size = (src_entry->format & 0xF) == GX_TF_I4 ? 32 : 512;
size_t palette_size = src_entry->format == TextureFormat::I4 ? 32 : 512;
if (!ReserveTexelBufferStorage(palette_size, sizeof(u16)))
return;
@ -201,13 +201,13 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
UtilityShaderDraw draw(command_buffer,
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_TEXTURE_CONVERSION),
render_pass, g_shader_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE,
m_palette_conversion_shaders[palette_format]);
m_palette_conversion_shaders[static_cast<int>(palette_format)]);
VkRect2D region = {{0, 0}, {dst_entry->GetWidth(), dst_entry->GetHeight()}};
draw.BeginRenderPass(destination_texture->GetFramebuffer(), region);
PSUniformBlock uniforms = {};
uniforms.multiplier = (src_entry->format & 0xF) == GX_TF_I4 ? 15.0f : 255.0f;
uniforms.multiplier = src_entry->format == TextureFormat::I4 ? 15.0f : 255.0f;
uniforms.texel_buffer_offset = static_cast<int>(palette_offset / sizeof(u16));
draw.SetPushConstants(&uniforms, sizeof(uniforms));
draw.SetPSSampler(0, source_texture->GetRawTexIdentifier()->GetView(),
@ -219,16 +219,15 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
}
void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr,
const EFBCopyFormat& format, u32 native_width,
const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half)
const EFBRectangle& src_rect, bool scale_by_half)
{
VkShaderModule shader = GetEncodingShader(format);
VkShaderModule shader = GetEncodingShader(params);
if (shader == VK_NULL_HANDLE)
{
ERROR_LOG(VIDEO, "Missing encoding fragment shader for format %u->%u", format.efb_format,
static_cast<u32>(format.copy_format));
ERROR_LOG(VIDEO, "Missing encoding fragment shader for format %u->%u",
static_cast<unsigned>(params.efb_format), static_cast<unsigned>(params.copy_format));
return;
}
@ -251,7 +250,7 @@ void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_p
// We also linear filtering for both box filtering and downsampling higher resolutions to 1x
// TODO: This only produces perfect downsampling for 1.5x and 2x IR, other resolution will
// need more complex down filtering to average all pixels and produce the correct result.
bool linear_filter = (scale_by_half && !is_depth_copy) || g_ActiveConfig.iEFBScale != SCALE_1X;
bool linear_filter = (scale_by_half && !params.depth) || g_ActiveConfig.iEFBScale != SCALE_1X;
draw.SetPSSampler(0, src_texture, linear_filter ? g_object_cache->GetLinearSampler() :
g_object_cache->GetPointSampler());
@ -387,7 +386,7 @@ void TextureConverter::DecodeYUYVTextureFromMemory(VKTexture* dst_texture, const
draw.EndRenderPass();
}
bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format)
bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format)
{
auto key = std::make_pair(format, palette_format);
auto iter = m_decoding_pipelines.find(key);
@ -424,7 +423,7 @@ void TextureConverter::DecodeTexture(VkCommandBuffer command_buffer,
TextureCache::TCacheEntry* entry, u32 dst_level,
const u8* data, size_t data_size, TextureFormat format,
u32 width, u32 height, u32 aligned_width, u32 aligned_height,
u32 row_stride, const u8* palette, TlutFormat palette_format)
u32 row_stride, const u8* palette, TLUTFormat palette_format)
{
VKTexture* destination_texture = static_cast<VKTexture*>(entry->texture.get());
auto key = std::make_pair(format, palette_format);
@ -667,21 +666,21 @@ bool TextureConverter::CompilePaletteConversionShaders()
std::string palette_rgb5a3_program = StringFromFormat(
"%s\n%s", "#define DECODE DecodePixel_RGB5A3", PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE);
m_palette_conversion_shaders[GX_TL_IA8] =
m_palette_conversion_shaders[static_cast<int>(TLUTFormat::IA8)] =
Util::CompileAndCreateFragmentShader(palette_ia8_program);
m_palette_conversion_shaders[GX_TL_RGB565] =
m_palette_conversion_shaders[static_cast<int>(TLUTFormat::RGB565)] =
Util::CompileAndCreateFragmentShader(palette_rgb565_program);
m_palette_conversion_shaders[GX_TL_RGB5A3] =
m_palette_conversion_shaders[static_cast<int>(TLUTFormat::RGB5A3)] =
Util::CompileAndCreateFragmentShader(palette_rgb5a3_program);
return m_palette_conversion_shaders[GX_TL_IA8] != VK_NULL_HANDLE &&
m_palette_conversion_shaders[GX_TL_RGB565] != VK_NULL_HANDLE &&
m_palette_conversion_shaders[GX_TL_RGB5A3] != VK_NULL_HANDLE;
return m_palette_conversion_shaders[static_cast<int>(TLUTFormat::IA8)] != VK_NULL_HANDLE &&
m_palette_conversion_shaders[static_cast<int>(TLUTFormat::RGB565)] != VK_NULL_HANDLE &&
m_palette_conversion_shaders[static_cast<int>(TLUTFormat::RGB5A3)] != VK_NULL_HANDLE;
}
VkShaderModule TextureConverter::CompileEncodingShader(const EFBCopyFormat& format)
VkShaderModule TextureConverter::CompileEncodingShader(const EFBCopyParams& params)
{
const char* shader = TextureConversionShader::GenerateEncodingShader(format, APIType::Vulkan);
const char* shader = TextureConversionShader::GenerateEncodingShader(params, APIType::Vulkan);
VkShaderModule module = Util::CompileAndCreateFragmentShader(shader);
if (module == VK_NULL_HANDLE)
PanicAlert("Failed to compile texture encoding shader.");
@ -689,14 +688,14 @@ VkShaderModule TextureConverter::CompileEncodingShader(const EFBCopyFormat& form
return module;
}
VkShaderModule TextureConverter::GetEncodingShader(const EFBCopyFormat& format)
VkShaderModule TextureConverter::GetEncodingShader(const EFBCopyParams& params)
{
auto iter = m_encoding_shaders.find(format);
auto iter = m_encoding_shaders.find(params);
if (iter != m_encoding_shaders.end())
return iter->second;
VkShaderModule shader = CompileEncodingShader(format);
m_encoding_shaders.emplace(format, shader);
VkShaderModule shader = CompileEncodingShader(params);
m_encoding_shaders.emplace(params, shader);
return shader;
}

View File

@ -33,14 +33,13 @@ public:
// Applies palette to dst_entry, using indices from src_entry.
void ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
TextureCache::TCacheEntry* src_entry, VkRenderPass render_pass,
const void* palette, TlutFormat palette_format);
const void* palette, TLUTFormat palette_format);
// Uses an encoding shader to copy src_texture to dest_ptr.
// NOTE: Executes the current command buffer.
void EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyFormat& format,
void EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
u32 memory_stride, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half);
u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half);
// Encodes texture to guest memory in XFB (YUYV) format.
void EncodeTextureToMemoryYUYV(void* dst_ptr, u32 dst_width, u32 dst_stride, u32 dst_height,
@ -50,11 +49,11 @@ public:
void DecodeYUYVTextureFromMemory(VKTexture* dst_texture, const void* src_ptr, u32 src_width,
u32 src_stride, u32 src_height);
bool SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format);
bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format);
void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry,
u32 dst_level, const u8* data, size_t data_size, TextureFormat format,
u32 width, u32 height, u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format);
const u8* palette, TLUTFormat palette_format);
private:
static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4;
@ -71,8 +70,8 @@ private:
bool CompilePaletteConversionShaders();
VkShaderModule CompileEncodingShader(const EFBCopyFormat& format);
VkShaderModule GetEncodingShader(const EFBCopyFormat& format);
VkShaderModule CompileEncodingShader(const EFBCopyParams& params);
VkShaderModule GetEncodingShader(const EFBCopyParams& params);
bool CreateEncodingRenderPass();
bool CreateEncodingTexture();
@ -105,7 +104,7 @@ private:
std::array<VkShaderModule, NUM_PALETTE_CONVERSION_SHADERS> m_palette_conversion_shaders = {};
// Texture encoding - RGBA8->GX format in memory
std::map<EFBCopyFormat, VkShaderModule> m_encoding_shaders;
std::map<EFBCopyParams, VkShaderModule> m_encoding_shaders;
VkRenderPass m_encoding_render_pass = VK_NULL_HANDLE;
std::unique_ptr<Texture2D> m_encoding_render_texture;
VkFramebuffer m_encoding_render_framebuffer = VK_NULL_HANDLE;
@ -118,7 +117,7 @@ private:
VkShaderModule compute_shader;
bool valid;
};
std::map<std::pair<TextureFormat, TlutFormat>, TextureDecodingPipeline> m_decoding_pipelines;
std::map<std::pair<TextureFormat, TLUTFormat>, TextureDecodingPipeline> m_decoding_pipelines;
std::unique_ptr<Texture2D> m_decoding_texture;
// XFB encoding/decoding shaders

View File

@ -9,6 +9,8 @@
#include "Common/BitField.h"
#include "Common/CommonTypes.h"
enum class EFBCopyFormat;
#pragma pack(4)
enum
@ -958,7 +960,10 @@ union UPE_Copy
BitField<16, 1, u32>
auto_conv; // if 0 automatic color conversion by texture format and pixel type
u32 tp_realFormat() const { return target_pixel_format / 2 + (target_pixel_format & 1) * 8; }
EFBCopyFormat tp_realFormat() const
{
return static_cast<EFBCopyFormat>(target_pixel_format / 2 + (target_pixel_format & 1) * 8);
}
};
union BPU_PreloadTileInfo

View File

@ -215,7 +215,7 @@ void HiresTexture::Prefetch()
}
std::string HiresTexture::GenBaseName(const u8* texture, size_t texture_size, const u8* tlut,
size_t tlut_size, u32 width, u32 height, int format,
size_t tlut_size, u32 width, u32 height, TextureFormat format,
bool has_mipmaps, bool dump)
{
std::string name = "";
@ -385,7 +385,8 @@ u32 HiresTexture::CalculateMipCount(u32 width, u32 height)
std::shared_ptr<HiresTexture> HiresTexture::Search(const u8* texture, size_t texture_size,
const u8* tlut, size_t tlut_size, u32 width,
u32 height, int format, bool has_mipmaps)
u32 height, TextureFormat format,
bool has_mipmaps)
{
std::string base_filename =
GenBaseName(texture, texture_size, tlut, tlut_size, width, height, format, has_mipmaps);

View File

@ -11,6 +11,8 @@
#include "Common/CommonTypes.h"
#include "VideoCommon/TextureConfig.h"
enum class TextureFormat;
class HiresTexture
{
public:
@ -22,10 +24,10 @@ public:
static std::shared_ptr<HiresTexture> Search(const u8* texture, size_t texture_size,
const u8* tlut, size_t tlut_size, u32 width,
u32 height, int format, bool has_mipmaps);
u32 height, TextureFormat format, bool has_mipmaps);
static std::string GenBaseName(const u8* texture, size_t texture_size, const u8* tlut,
size_t tlut_size, u32 width, u32 height, int format,
size_t tlut_size, u32 width, u32 height, TextureFormat format,
bool has_mipmaps, bool dump = false);
static u32 CalculateMipCount(u32 width, u32 height);

View File

@ -226,8 +226,8 @@ void TextureCacheBase::SetBackupConfig(const VideoConfig& config)
backup_config.gpu_texture_decoding = config.bEnableGPUTextureDecoding;
}
TextureCacheBase::TCacheEntry* TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry,
u8* palette, u32 tlutfmt)
TextureCacheBase::TCacheEntry*
TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt)
{
TextureConfig new_config = entry->texture->GetConfig();
new_config.levels = 1;
@ -243,7 +243,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::ApplyPaletteToEntry(TCacheEntry
decoded_entry->frameCount = FRAMECOUNT_INVALID;
decoded_entry->is_efb_copy = false;
ConvertTexture(decoded_entry, entry, palette, static_cast<TlutFormat>(tlutfmt));
ConvertTexture(decoded_entry, entry, palette, tlutfmt);
textures_by_address.emplace(entry->addr, decoded_entry);
return decoded_entry;
@ -290,7 +290,8 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e
}
TextureCacheBase::TCacheEntry*
TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, u32 tlutfmt)
TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt)
{
// If the flag may_have_overlapping_textures is cleared, there are no overlapping EFB copies,
// which aren't applied already. It is set for new textures, and for the affected range
@ -299,19 +300,17 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
return entry_to_update;
entry_to_update->may_have_overlapping_textures = false;
const bool isPaletteTexture =
(entry_to_update->format == GX_TF_C4 || entry_to_update->format == GX_TF_C8 ||
entry_to_update->format == GX_TF_C14X2 || entry_to_update->format >= 0x10000);
const bool isPaletteTexture = IsColorIndexed(entry_to_update->format.texfmt);
// EFB copies are excluded from these updates, until there's an example where a game would
// benefit from updating. This would require more work to be done.
if (entry_to_update->IsEfbCopy())
return entry_to_update;
u32 block_width = TexDecoder_GetBlockWidthInTexels(entry_to_update->format & 0xf);
u32 block_height = TexDecoder_GetBlockHeightInTexels(entry_to_update->format & 0xf);
u32 block_width = TexDecoder_GetBlockWidthInTexels(entry_to_update->format.texfmt);
u32 block_height = TexDecoder_GetBlockHeightInTexels(entry_to_update->format.texfmt);
u32 block_size = block_width * block_height *
TexDecoder_GetTexelSizeInNibbles(entry_to_update->format & 0xf) / 2;
TexDecoder_GetTexelSizeInNibbles(entry_to_update->format.texfmt) / 2;
u32 numBlocksX = (entry_to_update->native_width + block_width - 1) / block_width;
@ -490,9 +489,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
const u32 address = (tex.texImage3[id].image_base /* & 0x1FFFFF*/) << 5;
u32 width = tex.texImage0[id].width + 1;
u32 height = tex.texImage0[id].height + 1;
const int texformat = tex.texImage0[id].format;
const TextureFormat texformat = static_cast<TextureFormat>(tex.texImage0[id].format);
const u32 tlutaddr = tex.texTlut[id].tmem_offset << 9;
const u32 tlutfmt = tex.texTlut[id].tlut_format;
const TLUTFormat tlutfmt = static_cast<TLUTFormat>(tex.texTlut[id].tlut_format);
const bool use_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0[id]);
u32 tex_levels = use_mipmaps ? ((tex.texMode1[id].max_lod + 0xf) / 0x10 + 1) : 1;
const bool from_tmem = tex.texImage1[id].image_type != 0;
@ -511,18 +510,14 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
u64 base_hash = TEXHASH_INVALID;
u64 full_hash = TEXHASH_INVALID;
u32 full_format = texformat;
TextureAndTLUTFormat full_format(texformat, tlutfmt);
const bool isPaletteTexture =
(texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2);
const bool isPaletteTexture = IsColorIndexed(texformat);
// Reject invalid tlut format.
if (isPaletteTexture && tlutfmt > GX_TL_RGB5A3)
if (isPaletteTexture && !IsValidTLUTFormat(tlutfmt))
return nullptr;
if (isPaletteTexture)
full_format = texformat | (tlutfmt << 16);
const u32 texture_size =
TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
u32 bytes_per_block = (bsw * bsh * TexDecoder_GetTexelSizeInNibbles(texformat)) / 2;
@ -766,11 +761,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
bool decode_on_gpu =
!hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
g_texture_cache->SupportsGPUTextureDecode(static_cast<TextureFormat>(texformat),
static_cast<TlutFormat>(tlutfmt)) &&
!(from_tmem && texformat == GX_TF_RGBA8);
bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
g_texture_cache->SupportsGPUTextureDecode(texformat, tlutfmt) &&
!(from_tmem && texformat == TextureFormat::RGBA8);
// create the entry/texture
TextureConfig config;
@ -796,18 +789,16 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
if (!hires_tex && decode_on_gpu)
{
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(
entry, 0, src_data, texture_size, static_cast<TextureFormat>(texformat), width, height,
expandedWidth, expandedHeight, row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
g_texture_cache->DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, height,
expandedWidth, expandedHeight, row_stride, tlut, tlutfmt);
}
else if (!hires_tex)
{
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
CheckTempSize(decoded_texture_size);
if (!(texformat == GX_TF_RGBA8 && from_tmem))
if (!(texformat == TextureFormat::RGBA8 && from_tmem))
{
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut,
(TlutFormat)tlutfmt);
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, tlutfmt);
}
else
{
@ -878,17 +869,16 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (expanded_mip_width / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size,
static_cast<TextureFormat>(texformat), mip_width,
mip_height, expanded_mip_width, expanded_mip_height,
row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat,
mip_width, mip_height, expanded_mip_width,
expanded_mip_height, row_stride, tlut, tlutfmt);
}
else
{
// No need to call CheckTempSize here, as mips will always be smaller than the base level.
size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height;
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat,
tlut, (TlutFormat)tlutfmt);
tlut, tlutfmt);
entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, temp,
decoded_mip_size);
}
@ -908,9 +898,10 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
return ReturnEntry(stage, entry);
}
void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride,
bool is_depth_copy, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf)
void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat,
u32 dstStride, bool is_depth_copy,
const EFBRectangle& srcRect, bool isIntensity,
bool scaleByHalf)
{
// Emulation methods:
//
@ -980,81 +971,73 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f;
unsigned int cbufid = UINT_MAX;
u32 srcFormat = bpmem.zcontrol.pixel_format;
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
bool efbHasAlpha = srcFormat == PEControl::RGBA6_Z24;
if (is_depth_copy)
{
switch (dstFormat)
{
case 0: // Z4
case EFBCopyFormat::R4: // Z4
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
cbufid = 0;
dstFormat |= _GX_TF_CTF;
break;
case 8: // Z8H
dstFormat |= _GX_TF_CTF;
case 1: // Z8
case EFBCopyFormat::R8_0x1: // Z8
case EFBCopyFormat::R8: // Z8H
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
cbufid = 1;
break;
case 3: // Z16
case EFBCopyFormat::RA8: // Z16
colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
cbufid = 2;
break;
case 11: // Z16 (reverse order)
case EFBCopyFormat::RG8: // Z16 (reverse order)
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
cbufid = 3;
dstFormat |= _GX_TF_CTF;
break;
case 6: // Z24X8
case EFBCopyFormat::RGBA8: // Z24X8
colmat[0] = colmat[5] = colmat[10] = 1.0f;
cbufid = 4;
break;
case 9: // Z8M
case EFBCopyFormat::G8: // Z8M
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
cbufid = 5;
dstFormat |= _GX_TF_CTF;
break;
case 10: // Z8L
case EFBCopyFormat::B8: // Z8L
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
cbufid = 6;
dstFormat |= _GX_TF_CTF;
break;
case 12: // Z16L - copy lower 16 depth bits
case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits
// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits
// stored as alpha)
// Used e.g. in Zelda: Skyward Sword
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
cbufid = 7;
dstFormat |= _GX_TF_CTF;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", dstFormat);
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast<int>(dstFormat));
colmat[2] = colmat[5] = colmat[8] = 1.0f;
cbufid = 8;
break;
}
dstFormat |= _GX_TF_ZTF;
}
else if (isIntensity)
{
fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f / 255.0f;
switch (dstFormat)
{
case 0: // I4
case 1: // I8
case 2: // IA4
case 3: // IA8
case 8: // I8
case EFBCopyFormat::R4: // I4
case EFBCopyFormat::R8_0x1: // I8
case EFBCopyFormat::R8: // IA4
case EFBCopyFormat::RA4: // IA8
case EFBCopyFormat::RA8: // I8
// TODO - verify these coefficients
colmat[0] = 0.257f;
colmat[1] = 0.504f;
@ -1066,13 +1049,14 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
colmat[9] = 0.504f;
colmat[10] = 0.098f;
if (dstFormat < 2 || dstFormat == 8)
if (dstFormat == EFBCopyFormat::R4 || dstFormat == EFBCopyFormat::R8_0x1 ||
dstFormat == EFBCopyFormat::R8)
{
colmat[12] = 0.257f;
colmat[13] = 0.504f;
colmat[14] = 0.098f;
fConstAdd[3] = 16.0f / 255.0f;
if (dstFormat == 0)
if (dstFormat == EFBCopyFormat::R4)
{
ColorMask[0] = ColorMask[1] = ColorMask[2] = 255.0f / 16.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f;
@ -1086,7 +1070,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
else // alpha
{
colmat[15] = 1;
if (dstFormat == 2)
if (dstFormat == EFBCopyFormat::RA4)
{
ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f / 16.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f;
@ -1100,7 +1084,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
break;
default:
ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", dstFormat);
ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%X", static_cast<int>(dstFormat));
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
cbufid = 13;
break;
@ -1110,21 +1094,19 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
{
switch (dstFormat)
{
case 0: // R4
case EFBCopyFormat::R4: // R4
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
ColorMask[0] = 255.0f / 16.0f;
ColorMask[4] = 1.0f / 15.0f;
cbufid = 14;
dstFormat |= _GX_TF_CTF;
break;
case 1: // R8
case 8: // R8
case EFBCopyFormat::R8_0x1: // R8
case EFBCopyFormat::R8: // R8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
cbufid = 15;
dstFormat = GX_CTF_R8;
break;
case 2: // RA4
case EFBCopyFormat::RA4: // RA4
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
ColorMask[0] = ColorMask[3] = 255.0f / 16.0f;
ColorMask[4] = ColorMask[7] = 1.0f / 15.0f;
@ -1136,9 +1118,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
fConstAdd[3] = 1.0f;
cbufid = 17;
}
dstFormat |= _GX_TF_CTF;
break;
case 3: // RA8
case EFBCopyFormat::RA8: // RA8
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
cbufid = 18;
@ -1148,10 +1129,9 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
fConstAdd[3] = 1.0f;
cbufid = 19;
}
dstFormat |= _GX_TF_CTF;
break;
case 7: // A8
case EFBCopyFormat::A8: // A8
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
cbufid = 20;
@ -1164,33 +1144,28 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
fConstAdd[3] = 1.0f;
cbufid = 21;
}
dstFormat |= _GX_TF_CTF;
break;
case 9: // G8
case EFBCopyFormat::G8: // G8
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
cbufid = 22;
dstFormat |= _GX_TF_CTF;
break;
case 10: // B8
case EFBCopyFormat::B8: // B8
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
cbufid = 23;
dstFormat |= _GX_TF_CTF;
break;
case 11: // RG8
case EFBCopyFormat::RG8: // RG8
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
cbufid = 24;
dstFormat |= _GX_TF_CTF;
break;
case 12: // GB8
case EFBCopyFormat::GB8: // GB8
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
cbufid = 25;
dstFormat |= _GX_TF_CTF;
break;
case 4: // RGB565
case EFBCopyFormat::RGB565: // RGB565
colmat[0] = colmat[5] = colmat[10] = 1.0f;
ColorMask[0] = ColorMask[2] = 255.0f / 8.0f;
ColorMask[4] = ColorMask[6] = 1.0f / 31.0f;
@ -1200,7 +1175,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
cbufid = 26;
break;
case 5: // RGB5A3
case EFBCopyFormat::RGB5A3: // RGB5A3
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
ColorMask[0] = ColorMask[1] = ColorMask[2] = 255.0f / 8.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f;
@ -1215,7 +1190,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
cbufid = 28;
}
break;
case 6: // RGBA8
case EFBCopyFormat::RGBA8: // RGBA8
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
cbufid = 29;
@ -1228,7 +1203,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
break;
default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", dstFormat);
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast<int>(dstFormat));
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
cbufid = 31;
break;
@ -1267,7 +1242,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
}
// Get the base (in memory) format of this efb copy.
int baseFormat = TexDecoder_GetEfbCopyBaseFormat(dstFormat);
TextureFormat baseFormat = TexDecoder_GetEFBCopyBaseFormat(dstFormat);
u32 blockH = TexDecoder_GetBlockHeightInTexels(baseFormat);
const u32 blockW = TexDecoder_GetBlockWidthInTexels(baseFormat);
@ -1280,7 +1255,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
const u32 num_blocks_x = actualWidth / blockW;
// RGBA takes two cache lines per block; all others take one
const u32 bytes_per_block = baseFormat == GX_TF_RGBA8 ? 64 : 32;
const u32 bytes_per_block = baseFormat == TextureFormat::RGBA8 ? 64 : 32;
const u32 bytes_per_row = num_blocks_x * bytes_per_block;
const u32 covered_range = num_blocks_y * dstStride;
@ -1290,9 +1265,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
if (copy_to_ram)
{
EFBCopyFormat format(srcFormat, static_cast<TextureFormat>(dstFormat));
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, is_depth_copy, srcRect,
scaleByHalf);
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf);
}
else
{
@ -1510,7 +1484,7 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter)
u32 TextureCacheBase::TCacheEntry::BytesPerRow() const
{
const u32 blockW = TexDecoder_GetBlockWidthInTexels(format);
const u32 blockW = TexDecoder_GetBlockWidthInTexels(format.texfmt);
// Round up source height to multiple of block size
const u32 actualWidth = Common::AlignUp(native_width, blockW);
@ -1518,14 +1492,14 @@ u32 TextureCacheBase::TCacheEntry::BytesPerRow() const
const u32 numBlocksX = actualWidth / blockW;
// RGBA takes two cache lines per block; all others take one
const u32 bytes_per_block = format == GX_TF_RGBA8 ? 64 : 32;
const u32 bytes_per_block = format == TextureFormat::RGBA8 ? 64 : 32;
return numBlocksX * bytes_per_block;
}
u32 TextureCacheBase::TCacheEntry::NumBlocksY() const
{
u32 blockH = TexDecoder_GetBlockHeightInTexels(format);
u32 blockH = TexDecoder_GetBlockHeightInTexels(format.texfmt);
// Round up source height to multiple of block size
u32 actualHeight = Common::AlignUp(native_height, blockH);

View File

@ -21,6 +21,47 @@
struct VideoConfig;
struct TextureAndTLUTFormat
{
TextureAndTLUTFormat(TextureFormat texfmt_ = TextureFormat::I4,
TLUTFormat tlutfmt_ = TLUTFormat::IA8)
: texfmt(texfmt_), tlutfmt(tlutfmt_)
{
}
bool operator==(const TextureAndTLUTFormat& other) const
{
if (IsColorIndexed(texfmt))
return texfmt == other.texfmt && tlutfmt == other.tlutfmt;
return texfmt == other.texfmt;
}
bool operator!=(const TextureAndTLUTFormat& other) const { return !operator==(other); }
TextureFormat texfmt;
TLUTFormat tlutfmt;
};
struct EFBCopyParams
{
EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
bool yuv_)
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_)
{
}
bool operator<(const EFBCopyParams& rhs) const
{
return std::tie(efb_format, copy_format, depth, yuv) <
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv);
}
PEControl::PixelFormat efb_format;
EFBCopyFormat copy_format;
bool depth;
bool yuv;
};
class TextureCacheBase
{
private:
@ -34,8 +75,8 @@ public:
u32 addr;
u32 size_in_bytes;
u64 base_hash;
u64 hash; // for paletted textures, hash = base_hash ^ palette_hash
u32 format; // bits 0-3 will contain the in-memory format.
u64 hash; // for paletted textures, hash = base_hash ^ palette_hash
TextureAndTLUTFormat format;
u32 memory_stride;
bool is_efb_copy;
bool is_custom_tex;
@ -62,7 +103,7 @@ public:
~TCacheEntry();
void SetGeneralParameters(u32 _addr, u32 _size, u32 _format)
void SetGeneralParameters(u32 _addr, u32 _size, TextureAndTLUTFormat _format)
{
addr = _addr;
size_in_bytes = _size;
@ -119,9 +160,9 @@ public:
void Invalidate();
virtual void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half) = 0;
virtual void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half) = 0;
virtual bool CompileShaders() = 0;
virtual void DeleteShaders() = 0;
@ -130,15 +171,15 @@ public:
static void InvalidateAllBindPoints() { valid_bind_points.reset(); }
static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); }
void BindTextures();
void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride,
void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 dstStride,
bool is_depth_copy, const EFBRectangle& srcRect, bool isIntensity,
bool scaleByHalf);
virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, void* palette,
TlutFormat format) = 0;
virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TLUTFormat format) = 0;
// Returns true if the texture data and palette formats are supported by the GPU decoder.
virtual bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
virtual bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format)
{
return false;
}
@ -150,7 +191,7 @@ public:
virtual void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
const u8* palette, TLUTFormat palette_format)
{
}
@ -177,10 +218,11 @@ private:
void SetBackupConfig(const VideoConfig& config);
TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, u32 tlutfmt);
TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt);
void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height);
TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, u32 tlutfmt);
TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt);
void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level);
void CheckTempSize(size_t required_size);

View File

@ -13,6 +13,7 @@
#include "Common/MathUtil.h"
#include "Common/MsgHandler.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/VideoCommon.h"
@ -23,64 +24,40 @@ static bool IntensityConstantAdded = false;
namespace TextureConversionShader
{
u16 GetEncodedSampleCount(u32 format)
u16 GetEncodedSampleCount(EFBCopyFormat format)
{
switch (format)
{
case GX_TF_I4:
case EFBCopyFormat::R4:
return 8;
case GX_TF_I8:
case EFBCopyFormat::RA4:
return 4;
case GX_TF_IA4:
return 4;
case GX_TF_IA8:
case EFBCopyFormat::RA8:
return 2;
case GX_TF_RGB565:
case EFBCopyFormat::RGB565:
return 2;
case GX_TF_RGB5A3:
case EFBCopyFormat::RGB5A3:
return 2;
case GX_TF_RGBA8:
case EFBCopyFormat::RGBA8:
return 1;
case GX_CTF_R4:
return 8;
case GX_CTF_RA4:
case EFBCopyFormat::A8:
case EFBCopyFormat::R8_0x1:
case EFBCopyFormat::R8:
case EFBCopyFormat::G8:
case EFBCopyFormat::B8:
return 4;
case GX_CTF_RA8:
return 2;
case GX_CTF_A8:
return 4;
case GX_CTF_R8:
return 4;
case GX_CTF_G8:
return 4;
case GX_CTF_B8:
return 4;
case GX_CTF_RG8:
return 2;
case GX_CTF_GB8:
return 2;
case GX_TF_Z8:
return 4;
case GX_TF_Z16:
return 2;
case GX_TF_Z24X8:
return 1;
case GX_CTF_Z4:
return 8;
case GX_CTF_Z8M:
return 4;
case GX_CTF_Z8L:
return 4;
case GX_CTF_Z16L:
case EFBCopyFormat::RG8:
case EFBCopyFormat::GB8:
return 2;
default:
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEncodedSampleCount)", static_cast<int>(format));
return 1;
}
}
// block dimensions : widthStride, heightStride
// texture dims : width, height, x offset, y offset
static void WriteSwizzler(char*& p, u32 format, APIType ApiType)
static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
{
// left, top, of source rectangle within source texture
// width of the destination rectangle, scale_factor (1 or 2)
@ -108,8 +85,8 @@ static void WriteSwizzler(char*& p, u32 format, APIType ApiType)
WRITE(p, " return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n");
WRITE(p, "}\n");
int blkW = TexDecoder_GetBlockWidthInTexels(format);
int blkH = TexDecoder_GetBlockHeightInTexels(format);
int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format);
int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
int samples = GetEncodedSampleCount(format);
if (ApiType == APIType::OpenGL)
@ -180,13 +157,13 @@ static void WriteSwizzler(char*& p, u32 format, APIType ApiType)
}
static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset,
APIType ApiType, const EFBCopyFormat& format, bool depth)
APIType ApiType, const EFBCopyParams& params)
{
WRITE(p, " %s = ", dest);
if (!depth)
if (!params.depth)
{
switch (format.efb_format)
switch (params.efb_format)
{
case PEControl::RGB8_Z24:
WRITE(p, "RGBA8ToRGB8(");
@ -246,21 +223,21 @@ static void WriteEncoderEnd(char*& p)
IntensityConstantAdded = false;
}
static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_TF_I8, ApiType);
WriteSwizzler(p, EFBCopyFormat::R8, ApiType);
WRITE(p, " float3 texSample;\n");
WriteSampleColor(p, "rgb", "texSample", 0, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 0, ApiType, params);
WriteColorToIntensity(p, "texSample", "ocol0.b");
WriteSampleColor(p, "rgb", "texSample", 1, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 1, ApiType, params);
WriteColorToIntensity(p, "texSample", "ocol0.g");
WriteSampleColor(p, "rgb", "texSample", 2, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 2, ApiType, params);
WriteColorToIntensity(p, "texSample", "ocol0.r");
WriteSampleColor(p, "rgb", "texSample", 3, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 3, ApiType, params);
WriteColorToIntensity(p, "texSample", "ocol0.a");
WRITE(p, " ocol0.rgba += IntensityConst.aaaa;\n"); // see WriteColorToIntensity
@ -268,35 +245,35 @@ static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& forma
WriteEncoderEnd(p);
}
static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_TF_I4, ApiType);
WriteSwizzler(p, EFBCopyFormat::R4, ApiType);
WRITE(p, " float3 texSample;\n");
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgb", "texSample", 0, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 0, ApiType, params);
WriteColorToIntensity(p, "texSample", "color0.b");
WriteSampleColor(p, "rgb", "texSample", 1, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 1, ApiType, params);
WriteColorToIntensity(p, "texSample", "color1.b");
WriteSampleColor(p, "rgb", "texSample", 2, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 2, ApiType, params);
WriteColorToIntensity(p, "texSample", "color0.g");
WriteSampleColor(p, "rgb", "texSample", 3, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 3, ApiType, params);
WriteColorToIntensity(p, "texSample", "color1.g");
WriteSampleColor(p, "rgb", "texSample", 4, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 4, ApiType, params);
WriteColorToIntensity(p, "texSample", "color0.r");
WriteSampleColor(p, "rgb", "texSample", 5, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 5, ApiType, params);
WriteColorToIntensity(p, "texSample", "color1.r");
WriteSampleColor(p, "rgb", "texSample", 6, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 6, ApiType, params);
WriteColorToIntensity(p, "texSample", "color0.a");
WriteSampleColor(p, "rgb", "texSample", 7, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample", 7, ApiType, params);
WriteColorToIntensity(p, "texSample", "color1.a");
WRITE(p, " color0.rgba += IntensityConst.aaaa;\n");
@ -309,16 +286,16 @@ static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyFormat& forma
WriteEncoderEnd(p);
}
static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_TF_IA8, ApiType);
WriteSwizzler(p, EFBCopyFormat::RA8, ApiType);
WRITE(p, " float4 texSample;\n");
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params);
WRITE(p, " ocol0.b = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "ocol0.g");
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, params);
WRITE(p, " ocol0.r = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "ocol0.a");
@ -327,26 +304,26 @@ static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WriteEncoderEnd(p);
}
static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_TF_IA4, ApiType);
WriteSwizzler(p, EFBCopyFormat::RA4, ApiType);
WRITE(p, " float4 texSample;\n");
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params);
WRITE(p, " color0.b = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.b");
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, params);
WRITE(p, " color0.g = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.g");
WriteSampleColor(p, "rgba", "texSample", 2, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 2, ApiType, params);
WRITE(p, " color0.r = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.r");
WriteSampleColor(p, "rgba", "texSample", 3, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 3, ApiType, params);
WRITE(p, " color0.a = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.a");
@ -359,14 +336,14 @@ static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WriteEncoderEnd(p);
}
static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_TF_RGB565, ApiType);
WriteSwizzler(p, EFBCopyFormat::RGB565, ApiType);
WRITE(p, " float3 texSample0;\n");
WRITE(p, " float3 texSample1;\n");
WriteSampleColor(p, "rgb", "texSample0", 0, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample1", 1, ApiType, format, false);
WriteSampleColor(p, "rgb", "texSample0", 0, ApiType, params);
WriteSampleColor(p, "rgb", "texSample1", 1, ApiType, params);
WRITE(p, " float2 texRs = float2(texSample0.r, texSample1.r);\n");
WRITE(p, " float2 texGs = float2(texSample0.g, texSample1.g);\n");
WRITE(p, " float2 texBs = float2(texSample0.b, texSample1.b);\n");
@ -384,16 +361,16 @@ static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyFormat& f
WriteEncoderEnd(p);
}
static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_TF_RGB5A3, ApiType);
WriteSwizzler(p, EFBCopyFormat::RGB5A3, ApiType);
WRITE(p, " float4 texSample;\n");
WRITE(p, " float color0;\n");
WRITE(p, " float gUpper;\n");
WRITE(p, " float gLower;\n");
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params);
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
WRITE(p, "if(texSample.a > 0.878f) {\n");
@ -419,7 +396,7 @@ static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyFormat& f
WRITE(p, "}\n");
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, params);
WRITE(p, "if(texSample.a > 0.878f) {\n");
@ -448,21 +425,21 @@ static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyFormat& f
WriteEncoderEnd(p);
}
static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_TF_RGBA8, ApiType);
WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType);
WRITE(p, " float4 texSample;\n");
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params);
WRITE(p, " color0.b = texSample.a;\n");
WRITE(p, " color0.g = texSample.r;\n");
WRITE(p, " color1.b = texSample.g;\n");
WRITE(p, " color1.g = texSample.b;\n");
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, format, false);
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, params);
WRITE(p, " color0.r = texSample.a;\n");
WRITE(p, " color0.a = texSample.r;\n");
WRITE(p, " color1.r = texSample.g;\n");
@ -473,21 +450,20 @@ static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& fo
WriteEncoderEnd(p);
}
static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyFormat& format,
bool depth)
static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_CTF_R4, ApiType);
WriteSwizzler(p, EFBCopyFormat::R4, ApiType);
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, comp, "color0.b", 0, ApiType, format, depth);
WriteSampleColor(p, comp, "color1.b", 1, ApiType, format, depth);
WriteSampleColor(p, comp, "color0.g", 2, ApiType, format, depth);
WriteSampleColor(p, comp, "color1.g", 3, ApiType, format, depth);
WriteSampleColor(p, comp, "color0.r", 4, ApiType, format, depth);
WriteSampleColor(p, comp, "color1.r", 5, ApiType, format, depth);
WriteSampleColor(p, comp, "color0.a", 6, ApiType, format, depth);
WriteSampleColor(p, comp, "color1.a", 7, ApiType, format, depth);
WriteSampleColor(p, comp, "color0.b", 0, ApiType, params);
WriteSampleColor(p, comp, "color1.b", 1, ApiType, params);
WriteSampleColor(p, comp, "color0.g", 2, ApiType, params);
WriteSampleColor(p, comp, "color1.g", 3, ApiType, params);
WriteSampleColor(p, comp, "color0.r", 4, ApiType, params);
WriteSampleColor(p, comp, "color1.r", 5, ApiType, params);
WriteSampleColor(p, comp, "color0.a", 6, ApiType, params);
WriteSampleColor(p, comp, "color1.a", 7, ApiType, params);
WriteToBitDepth(p, 4, "color0", "color0");
WriteToBitDepth(p, 4, "color1", "color1");
@ -496,40 +472,39 @@ static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EF
WriteEncoderEnd(p);
}
static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyFormat& format,
bool depth)
static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_CTF_R8, ApiType);
WriteSwizzler(p, EFBCopyFormat::R8, ApiType);
WriteSampleColor(p, comp, "ocol0.b", 0, ApiType, format, depth);
WriteSampleColor(p, comp, "ocol0.g", 1, ApiType, format, depth);
WriteSampleColor(p, comp, "ocol0.r", 2, ApiType, format, depth);
WriteSampleColor(p, comp, "ocol0.a", 3, ApiType, format, depth);
WriteSampleColor(p, comp, "ocol0.b", 0, ApiType, params);
WriteSampleColor(p, comp, "ocol0.g", 1, ApiType, params);
WriteSampleColor(p, comp, "ocol0.r", 2, ApiType, params);
WriteSampleColor(p, comp, "ocol0.a", 3, ApiType, params);
WriteEncoderEnd(p);
}
static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType,
const EFBCopyFormat& format)
const EFBCopyParams& params)
{
WriteSwizzler(p, GX_CTF_RA4, ApiType);
WriteSwizzler(p, EFBCopyFormat::RA4, ApiType);
WRITE(p, " float2 texSample;\n");
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, comp, "texSample", 0, ApiType, format, false);
WriteSampleColor(p, comp, "texSample", 0, ApiType, params);
WRITE(p, " color0.b = texSample.x;\n");
WRITE(p, " color1.b = texSample.y;\n");
WriteSampleColor(p, comp, "texSample", 1, ApiType, format, false);
WriteSampleColor(p, comp, "texSample", 1, ApiType, params);
WRITE(p, " color0.g = texSample.x;\n");
WRITE(p, " color1.g = texSample.y;\n");
WriteSampleColor(p, comp, "texSample", 2, ApiType, format, false);
WriteSampleColor(p, comp, "texSample", 2, ApiType, params);
WRITE(p, " color0.r = texSample.x;\n");
WRITE(p, " color1.r = texSample.y;\n");
WriteSampleColor(p, comp, "texSample", 3, ApiType, format, false);
WriteSampleColor(p, comp, "texSample", 3, ApiType, params);
WRITE(p, " color0.a = texSample.x;\n");
WRITE(p, " color1.a = texSample.y;\n");
@ -541,48 +516,48 @@ static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType,
}
static void WriteCC8Encoder(char*& p, const char* comp, APIType ApiType,
const EFBCopyFormat& format)
const EFBCopyParams& params)
{
WriteSwizzler(p, GX_CTF_RA8, ApiType);
WriteSwizzler(p, EFBCopyFormat::RA8, ApiType);
WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType, format, false);
WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType, format, false);
WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType, params);
WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType, params);
WriteEncoderEnd(p);
}
static void WriteZ8Encoder(char*& p, const char* multiplier, APIType ApiType,
const EFBCopyFormat& format)
const EFBCopyParams& params)
{
WriteSwizzler(p, GX_CTF_Z8M, ApiType);
WriteSwizzler(p, EFBCopyFormat::G8, ApiType);
WRITE(p, " float depth;\n");
WriteSampleColor(p, "r", "depth", 0, ApiType, format, true);
WriteSampleColor(p, "r", "depth", 0, ApiType, params);
WRITE(p, "ocol0.b = frac(depth * %s);\n", multiplier);
WriteSampleColor(p, "r", "depth", 1, ApiType, format, true);
WriteSampleColor(p, "r", "depth", 1, ApiType, params);
WRITE(p, "ocol0.g = frac(depth * %s);\n", multiplier);
WriteSampleColor(p, "r", "depth", 2, ApiType, format, true);
WriteSampleColor(p, "r", "depth", 2, ApiType, params);
WRITE(p, "ocol0.r = frac(depth * %s);\n", multiplier);
WriteSampleColor(p, "r", "depth", 3, ApiType, format, true);
WriteSampleColor(p, "r", "depth", 3, ApiType, params);
WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier);
WriteEncoderEnd(p);
}
static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_TF_Z16, ApiType);
WriteSwizzler(p, EFBCopyFormat::RA8, ApiType);
WRITE(p, " float depth;\n");
WRITE(p, " float3 expanded;\n");
// byte order is reversed
WriteSampleColor(p, "r", "depth", 0, ApiType, format, true);
WriteSampleColor(p, "r", "depth", 0, ApiType, params);
WRITE(p, " depth *= 16777216.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -592,7 +567,7 @@ static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WRITE(p, " ocol0.b = expanded.g / 255.0;\n");
WRITE(p, " ocol0.g = expanded.r / 255.0;\n");
WriteSampleColor(p, "r", "depth", 1, ApiType, format, true);
WriteSampleColor(p, "r", "depth", 1, ApiType, params);
WRITE(p, " depth *= 16777216.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -605,16 +580,16 @@ static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WriteEncoderEnd(p);
}
static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_CTF_Z16L, ApiType);
WriteSwizzler(p, EFBCopyFormat::GB8, ApiType);
WRITE(p, " float depth;\n");
WRITE(p, " float3 expanded;\n");
// byte order is reversed
WriteSampleColor(p, "r", "depth", 0, ApiType, format, true);
WriteSampleColor(p, "r", "depth", 0, ApiType, params);
WRITE(p, " depth *= 16777216.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -626,7 +601,7 @@ static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyFormat& for
WRITE(p, " ocol0.b = expanded.b / 255.0;\n");
WRITE(p, " ocol0.g = expanded.g / 255.0;\n");
WriteSampleColor(p, "r", "depth", 1, ApiType, format, true);
WriteSampleColor(p, "r", "depth", 1, ApiType, params);
WRITE(p, " depth *= 16777216.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -641,17 +616,17 @@ static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyFormat& for
WriteEncoderEnd(p);
}
static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format)
static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{
WriteSwizzler(p, GX_TF_Z24X8, ApiType);
WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType);
WRITE(p, " float depth0;\n");
WRITE(p, " float depth1;\n");
WRITE(p, " float3 expanded0;\n");
WRITE(p, " float3 expanded1;\n");
WriteSampleColor(p, "r", "depth0", 0, ApiType, format, true);
WriteSampleColor(p, "r", "depth1", 1, ApiType, format, true);
WriteSampleColor(p, "r", "depth0", 0, ApiType, params);
WriteSampleColor(p, "r", "depth1", 1, ApiType, params);
for (int i = 0; i < 2; i++)
{
@ -681,87 +656,81 @@ static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WriteEncoderEnd(p);
}
const char* GenerateEncodingShader(const EFBCopyFormat& format, APIType api_type)
const char* GenerateEncodingShader(const EFBCopyParams& params, APIType api_type)
{
text[sizeof(text) - 1] = 0x7C; // canary
char* p = text;
switch (format.copy_format)
switch (params.copy_format)
{
case GX_TF_I4:
WriteI4Encoder(p, api_type, format);
case EFBCopyFormat::R4:
if (params.yuv)
WriteI4Encoder(p, api_type, params);
else
WriteC4Encoder(p, "r", api_type, params);
break;
case GX_TF_I8:
WriteI8Encoder(p, api_type, format);
case EFBCopyFormat::RA4:
if (params.yuv)
WriteIA4Encoder(p, api_type, params);
else
WriteCC4Encoder(p, "ar", api_type, params);
break;
case GX_TF_IA4:
WriteIA4Encoder(p, api_type, format);
case EFBCopyFormat::RA8:
if (params.yuv)
WriteIA8Encoder(p, api_type, params);
else
WriteCC8Encoder(p, "ar", api_type, params);
break;
case GX_TF_IA8:
WriteIA8Encoder(p, api_type, format);
case EFBCopyFormat::RGB565:
WriteRGB565Encoder(p, api_type, params);
break;
case GX_TF_RGB565:
WriteRGB565Encoder(p, api_type, format);
case EFBCopyFormat::RGB5A3:
WriteRGB5A3Encoder(p, api_type, params);
break;
case GX_TF_RGB5A3:
WriteRGB5A3Encoder(p, api_type, format);
case EFBCopyFormat::RGBA8:
if (params.depth)
WriteZ24Encoder(p, api_type, params);
else
WriteRGBA8Encoder(p, api_type, params);
break;
case GX_TF_RGBA8:
WriteRGBA8Encoder(p, api_type, format);
case EFBCopyFormat::A8:
WriteC8Encoder(p, "a", api_type, params);
break;
case GX_CTF_R4:
WriteC4Encoder(p, "r", api_type, format, false);
case EFBCopyFormat::R8_0x1:
case EFBCopyFormat::R8:
if (params.yuv)
WriteI8Encoder(p, api_type, params);
else
WriteC8Encoder(p, "r", api_type, params);
break;
case GX_CTF_RA4:
WriteCC4Encoder(p, "ar", api_type, format);
case EFBCopyFormat::G8:
if (params.depth)
WriteZ8Encoder(p, "256.0", api_type, params); // Z8M
else
WriteC8Encoder(p, "g", api_type, params);
break;
case GX_CTF_RA8:
WriteCC8Encoder(p, "ar", api_type, format);
case EFBCopyFormat::B8:
if (params.depth)
WriteZ8Encoder(p, "65536.0", api_type, params); // Z8L
else
WriteC8Encoder(p, "b", api_type, params);
break;
case GX_CTF_A8:
WriteC8Encoder(p, "a", api_type, format, false);
case EFBCopyFormat::RG8:
if (params.depth)
WriteZ16Encoder(p, api_type, params); // Z16H
else
WriteCC8Encoder(p, "rg", api_type, params);
break;
case GX_CTF_R8:
WriteC8Encoder(p, "r", api_type, format, false);
break;
case GX_CTF_G8:
WriteC8Encoder(p, "g", api_type, format, false);
break;
case GX_CTF_B8:
WriteC8Encoder(p, "b", api_type, format, false);
break;
case GX_CTF_RG8:
WriteCC8Encoder(p, "rg", api_type, format);
break;
case GX_CTF_GB8:
WriteCC8Encoder(p, "gb", api_type, format);
break;
case GX_CTF_Z8H:
case GX_TF_Z8:
WriteC8Encoder(p, "r", api_type, format, true);
break;
case GX_CTF_Z16R:
case GX_TF_Z16:
WriteZ16Encoder(p, api_type, format);
break;
case GX_TF_Z24X8:
WriteZ24Encoder(p, api_type, format);
break;
case GX_CTF_Z4:
WriteC4Encoder(p, "r", api_type, format, true);
break;
case GX_CTF_Z8M:
WriteZ8Encoder(p, "256.0", api_type, format);
break;
case GX_CTF_Z8L:
WriteZ8Encoder(p, "65536.0", api_type, format);
break;
case GX_CTF_Z16L:
WriteZ16LEncoder(p, api_type, format);
case EFBCopyFormat::GB8:
if (params.depth)
WriteZ16LEncoder(p, api_type, params); // Z16L
else
WriteCC8Encoder(p, "gb", api_type, params);
break;
default:
PanicAlert("Unknown texture copy format: 0x%x\n", static_cast<u32>(format.copy_format));
PanicAlert("Invalid EFB Copy Format (0x%X)! (GenerateEncodingShader)",
static_cast<int>(params.copy_format));
break;
}
@ -895,7 +864,7 @@ vec4 GetPaletteColorNormalized(uint index)
)";
static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
{GX_TF_I4,
{TextureFormat::I4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -930,7 +899,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
}
)"}},
{GX_TF_IA4,
{TextureFormat::IA4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -950,7 +919,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_I8,
{TextureFormat::I8,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -968,7 +937,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_IA8,
{TextureFormat::IA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -987,7 +956,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_RGB565,
{TextureFormat::RGB565,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -1011,7 +980,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
}
)"}},
{GX_TF_RGB5A3,
{TextureFormat::RGB5A3,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -1045,7 +1014,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
}
)"}},
{GX_TF_RGBA8,
{TextureFormat::RGBA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -1081,7 +1050,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_CMPR,
{TextureFormat::CMPR,
{BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true,
R"(
// In the compute version of this decoder, we flatten the blocks to a one-dimension array.
@ -1199,8 +1168,9 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color);
}
)"}},
{GX_TF_C4,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C4)), 8, 8, false,
{TextureFormat::C4,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C4)), 8, 8,
false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -1227,8 +1197,9 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
}
)"}},
{GX_TF_C8,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C8)), 8, 8, false,
{TextureFormat::C8,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C8)), 8, 8,
false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -1243,8 +1214,9 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_C14X2,
{BUFFER_FORMAT_R16_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C14X2)), 8, 8, false,
{TextureFormat::C14X2,
{BUFFER_FORMAT_R16_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C14X2)), 8,
8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
@ -1287,7 +1259,7 @@ std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width,
(height + (info->group_size_y - 1)) / info->group_size_y};
}
std::string GenerateDecodingShader(TextureFormat format, TlutFormat palette_format,
std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_format,
APIType api_type)
{
const DecodingShaderInfo* info = GetDecodingShaderInfo(format);
@ -1297,13 +1269,13 @@ std::string GenerateDecodingShader(TextureFormat format, TlutFormat palette_form
std::stringstream ss;
switch (palette_format)
{
case GX_TL_IA8:
case TLUTFormat::IA8:
ss << "#define PALETTE_FORMAT_IA8 1\n";
break;
case GX_TL_RGB565:
case TLUTFormat::RGB565:
ss << "#define PALETTE_FORMAT_RGB565 1\n";
break;
case GX_TL_RGB5A3:
case TLUTFormat::RGB5A3:
ss << "#define PALETTE_FORMAT_RGB5A3 1\n";
break;
}

View File

@ -8,15 +8,18 @@
#include <utility>
#include "Common/CommonTypes.h"
#include "VideoCommon/TextureDecoder.h"
enum class APIType;
enum class TextureFormat;
enum class EFBCopyFormat;
enum class TLUTFormat;
struct EFBCopyParams;
namespace TextureConversionShader
{
u16 GetEncodedSampleCount(u32 format);
u16 GetEncodedSampleCount(EFBCopyFormat format);
const char* GenerateEncodingShader(const EFBCopyFormat& format, APIType ApiType);
const char* GenerateEncodingShader(const EFBCopyParams& params, APIType ApiType);
// View format of the input data to the texture decoding shader.
enum BufferFormat
@ -51,7 +54,7 @@ u32 GetBytesPerBufferElement(BufferFormat buffer_format);
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height);
// Returns the GLSL string containing the texture decoding shader for the specified format.
std::string GenerateDecodingShader(TextureFormat format, TlutFormat palette_format,
std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_format,
APIType api_type);
} // namespace TextureConversionShader

View File

@ -14,94 +14,101 @@ enum
};
alignas(16) extern u8 texMem[TMEM_SIZE];
enum TextureFormat
enum class TextureFormat
{
// These are the texture formats that can be read by the texture mapper.
GX_TF_I4 = 0x0,
GX_TF_I8 = 0x1,
GX_TF_IA4 = 0x2,
GX_TF_IA8 = 0x3,
GX_TF_RGB565 = 0x4,
GX_TF_RGB5A3 = 0x5,
GX_TF_RGBA8 = 0x6,
GX_TF_C4 = 0x8,
GX_TF_C8 = 0x9,
GX_TF_C14X2 = 0xA,
GX_TF_CMPR = 0xE,
_GX_TF_ZTF = 0x10, // flag for Z texture formats (used internally by dolphin)
// Depth texture formats (which directly map to the equivalent colour format above.)
GX_TF_Z8 = 0x1 | _GX_TF_ZTF,
GX_TF_Z16 = 0x3 | _GX_TF_ZTF,
GX_TF_Z24X8 = 0x6 | _GX_TF_ZTF,
_GX_TF_CTF = 0x20, // flag for copy-texture-format only (used internally by dolphin)
// These are extra formats that can be used when copying from efb,
// they use one of texel formats from above, but pack diffrent data into them.
GX_CTF_R4 = 0x0 | _GX_TF_CTF,
GX_CTF_RA4 = 0x2 | _GX_TF_CTF,
GX_CTF_RA8 = 0x3 | _GX_TF_CTF,
GX_CTF_YUVA8 = 0x6 | _GX_TF_CTF, // YUV 4:4:4 - Dolphin doesn't implement this format as no
// commercial games use it
GX_CTF_A8 = 0x7 | _GX_TF_CTF,
GX_CTF_R8 = 0x8 | _GX_TF_CTF,
GX_CTF_G8 = 0x9 | _GX_TF_CTF,
GX_CTF_B8 = 0xA | _GX_TF_CTF,
GX_CTF_RG8 = 0xB | _GX_TF_CTF,
GX_CTF_GB8 = 0xC | _GX_TF_CTF,
// extra depth texture formats that can be used for efb copies.
GX_CTF_Z4 = 0x0 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8H = 0x8 | _GX_TF_ZTF | _GX_TF_CTF, // This produces an identical result to to GX_TF_Z8
GX_CTF_Z8M = 0x9 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8L = 0xA | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z16R = 0xB | _GX_TF_ZTF | _GX_TF_CTF, // Reversed version of GX_TF_Z16
GX_CTF_Z16L = 0xC | _GX_TF_ZTF | _GX_TF_CTF,
// These values represent texture format in GX registers.
I4 = 0x0,
I8 = 0x1,
IA4 = 0x2,
IA8 = 0x3,
RGB565 = 0x4,
RGB5A3 = 0x5,
RGBA8 = 0x6,
C4 = 0x8,
C8 = 0x9,
C14X2 = 0xA,
CMPR = 0xE,
};
enum TlutFormat
static inline bool IsColorIndexed(TextureFormat format)
{
GX_TL_IA8 = 0x0,
GX_TL_RGB565 = 0x1,
GX_TL_RGB5A3 = 0x2,
return format == TextureFormat::C4 || format == TextureFormat::C8 ||
format == TextureFormat::C14X2;
}
// The EFB Copy pipeline looks like:
//
// 1. Read EFB -> 2. Select color/depth -> 3. Downscale (optional)
// -> 4. YUV conversion (optional) -> 5. Encode Tiles -> 6. Write RAM
//
// The "Encode Tiles" stage receives RGBA8 texels from previous stages and encodes them to various
// formats. EFBCopyFormat is the tile encoder mode. Note that the tile encoder does not care about
// color vs. depth or intensity formats - it only sees RGBA8 texels.
enum class EFBCopyFormat
{
// These values represent EFB copy format in GX registers.
// Most (but not all) of these values correspond to values of TextureFormat.
R4 = 0x0, // R4, I4, Z4
// FIXME: Does 0x1 (Z8) have identical results to 0x8 (Z8H)?
// Is either or both of 0x1 and 0x8 used in games?
R8_0x1 = 0x1, // R8, I8, Z8H (?)
RA4 = 0x2, // RA4, IA4
// FIXME: Earlier versions of this file named the value 0x3 "GX_TF_Z16", which does not reflect
// the results one would expect when copying from the depth buffer with this format.
// For reference: When copying from the depth buffer, R should receive the top 8 bits of
// the Z value, and A should be either 0xFF or 0 (please investigate).
// Please test original hardware and make sure dolphin-emu implements this format
// correctly.
RA8 = 0x3, // RA8, IA8, (FIXME: Z16 too?)
RGB565 = 0x4,
RGB5A3 = 0x5,
RGBA8 = 0x6, // RGBA8, Z24
A8 = 0x7,
R8 = 0x8, // R8, I8, Z8H
G8 = 0x9, // G8, Z8M
B8 = 0xA, // B8, Z8L
RG8 = 0xB, // RG8, Z16R (Note: G and R are reversed)
GB8 = 0xC, // GB8, Z16L
};
struct EFBCopyFormat
enum class TLUTFormat
{
EFBCopyFormat(u32 efb_format_, TextureFormat copy_format_)
: efb_format(efb_format_), copy_format(copy_format_)
{
}
bool operator<(const EFBCopyFormat& rhs) const
{
return std::tie(efb_format, copy_format) < std::tie(rhs.efb_format, rhs.copy_format);
}
u32 efb_format;
TextureFormat copy_format;
// These values represent TLUT format in GX registers.
IA8 = 0x0,
RGB565 = 0x1,
RGB5A3 = 0x2,
};
int TexDecoder_GetTexelSizeInNibbles(int format);
int TexDecoder_GetTextureSizeInBytes(int width, int height, int format);
int TexDecoder_GetBlockWidthInTexels(u32 format);
int TexDecoder_GetBlockHeightInTexels(u32 format);
int TexDecoder_GetPaletteSize(int fmt);
int TexDecoder_GetEfbCopyBaseFormat(int format);
static inline bool IsValidTLUTFormat(TLUTFormat tlutfmt)
{
return tlutfmt == TLUTFormat::IA8 || tlutfmt == TLUTFormat::RGB565 ||
tlutfmt == TLUTFormat::RGB5A3;
}
void TexDecoder_Decode(u8* dst, const u8* src, int width, int height, int texformat, const u8* tlut,
TlutFormat tlutfmt);
int TexDecoder_GetTexelSizeInNibbles(TextureFormat format);
int TexDecoder_GetTextureSizeInBytes(int width, int height, TextureFormat format);
int TexDecoder_GetBlockWidthInTexels(TextureFormat format);
int TexDecoder_GetBlockHeightInTexels(TextureFormat format);
int TexDecoder_GetEFBCopyBlockWidthInTexels(EFBCopyFormat format);
int TexDecoder_GetEFBCopyBlockHeightInTexels(EFBCopyFormat format);
int TexDecoder_GetPaletteSize(TextureFormat fmt);
TextureFormat TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat format);
void TexDecoder_Decode(u8* dst, const u8* src, int width, int height, TextureFormat texformat,
const u8* tlut, TLUTFormat tlutfmt);
void TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int width,
int height);
void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth, int texformat,
const u8* tlut, TlutFormat tlutfmt);
void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt);
void TexDecoder_DecodeTexelRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int s, int t,
int imageWidth);
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center);
/* Internal method, implemented by TextureDecoder_Generic and TextureDecoder_x64. */
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt);
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, TextureFormat texformat,
const u8* tlut, TLUTFormat tlutfmt);

View File

@ -21,227 +21,176 @@ static bool TexFmt_Overlay_Center = false;
// STATE_TO_SAVE
alignas(16) u8 texMem[TMEM_SIZE];
int TexDecoder_GetTexelSizeInNibbles(int format)
int TexDecoder_GetTexelSizeInNibbles(TextureFormat format)
{
switch (format & 0x3f)
switch (format)
{
case GX_TF_I4:
// 4-bit formats
case TextureFormat::I4:
case TextureFormat::C4:
return 1;
case GX_TF_I8:
// 8-bit formats
case TextureFormat::I8:
case TextureFormat::IA4:
case TextureFormat::C8:
return 2;
case GX_TF_IA4:
return 2;
case GX_TF_IA8:
// 16-bit formats
case TextureFormat::IA8:
case TextureFormat::RGB565:
case TextureFormat::RGB5A3:
case TextureFormat::C14X2:
return 4;
case GX_TF_RGB565:
return 4;
case GX_TF_RGB5A3:
return 4;
case GX_TF_RGBA8:
// 32-bit formats
case TextureFormat::RGBA8:
return 8;
case GX_TF_C4:
// Compressed format
case TextureFormat::CMPR:
return 1;
case GX_TF_C8:
return 2;
case GX_TF_C14X2:
return 4;
case GX_TF_CMPR:
return 1;
case GX_CTF_R4:
return 1;
case GX_CTF_RA4:
return 2;
case GX_CTF_RA8:
return 4;
case GX_CTF_A8:
return 2;
case GX_CTF_R8:
return 2;
case GX_CTF_G8:
return 2;
case GX_CTF_B8:
return 2;
case GX_CTF_RG8:
return 4;
case GX_CTF_GB8:
return 4;
case GX_TF_Z8:
return 2;
case GX_TF_Z16:
return 4;
case GX_TF_Z24X8:
return 8;
case GX_CTF_Z4:
return 1;
case GX_CTF_Z8H:
return 2;
case GX_CTF_Z8M:
return 2;
case GX_CTF_Z8L:
return 2;
case GX_CTF_Z16R:
return 4;
case GX_CTF_Z16L:
return 4;
default:
PanicAlert("Unsupported Texture Format (%08x)! (GetTexelSizeInNibbles)", format);
PanicAlert("Invalid Texture Format (0x%X)! (GetTexelSizeInNibbles)", static_cast<int>(format));
return 1;
}
}
int TexDecoder_GetTextureSizeInBytes(int width, int height, int format)
int TexDecoder_GetTextureSizeInBytes(int width, int height, TextureFormat format)
{
return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2;
}
int TexDecoder_GetBlockWidthInTexels(u32 format)
int TexDecoder_GetBlockWidthInTexels(TextureFormat format)
{
switch (format)
{
case GX_TF_I4:
// 4-bit formats
case TextureFormat::I4:
case TextureFormat::C4:
return 8;
case GX_TF_I8:
// 8-bit formats
case TextureFormat::I8:
case TextureFormat::IA4:
case TextureFormat::C8:
return 8;
case GX_TF_IA4:
return 8;
case GX_TF_IA8:
// 16-bit formats
case TextureFormat::IA8:
case TextureFormat::RGB565:
case TextureFormat::RGB5A3:
case TextureFormat::C14X2:
return 4;
case GX_TF_RGB565:
// 32-bit formats
case TextureFormat::RGBA8:
return 4;
case GX_TF_RGB5A3:
return 4;
case GX_TF_RGBA8:
return 4;
case GX_TF_C4:
// Compressed format
case TextureFormat::CMPR:
return 8;
case GX_TF_C8:
return 8;
case GX_TF_C14X2:
return 4;
case GX_TF_CMPR:
return 8;
case GX_CTF_R4:
return 8;
case GX_CTF_RA4:
return 8;
case GX_CTF_RA8:
return 4;
case GX_CTF_A8:
return 8;
case GX_CTF_R8:
return 8;
case GX_CTF_G8:
return 8;
case GX_CTF_B8:
return 8;
case GX_CTF_RG8:
return 4;
case GX_CTF_GB8:
return 4;
case GX_TF_Z8:
return 8;
case GX_TF_Z16:
return 4;
case GX_TF_Z24X8:
return 4;
case GX_CTF_Z4:
return 8;
case GX_CTF_Z8H:
return 8;
case GX_CTF_Z8M:
return 8;
case GX_CTF_Z8L:
return 8;
case GX_CTF_Z16R:
return 4;
case GX_CTF_Z16L:
return 4;
default:
PanicAlert("Unsupported Texture Format (%08x)! (GetBlockWidthInTexels)", format);
PanicAlert("Invalid Texture Format (0x%X)! (GetBlockWidthInTexels)", static_cast<int>(format));
return 8;
}
}
int TexDecoder_GetBlockHeightInTexels(u32 format)
int TexDecoder_GetBlockHeightInTexels(TextureFormat format)
{
switch (format)
{
case GX_TF_I4:
// 4-bit formats
case TextureFormat::I4:
case TextureFormat::C4:
return 8;
case GX_TF_I8:
// 8-bit formats
case TextureFormat::I8:
case TextureFormat::IA4:
case TextureFormat::C8:
return 4;
case GX_TF_IA4:
// 16-bit formats
case TextureFormat::IA8:
case TextureFormat::RGB565:
case TextureFormat::RGB5A3:
case TextureFormat::C14X2:
return 4;
case GX_TF_IA8:
// 32-bit formats
case TextureFormat::RGBA8:
return 4;
case GX_TF_RGB565:
return 4;
case GX_TF_RGB5A3:
return 4;
case GX_TF_RGBA8:
return 4;
case GX_TF_C4:
// Compressed format
case TextureFormat::CMPR:
return 8;
case GX_TF_C8:
default:
PanicAlert("Invalid Texture Format (0x%X)! (GetBlockHeightInTexels)", static_cast<int>(format));
return 4;
case GX_TF_C14X2:
return 4;
case GX_TF_CMPR:
}
}
int TexDecoder_GetEFBCopyBlockWidthInTexels(EFBCopyFormat format)
{
switch (format)
{
// 4-bit formats
case EFBCopyFormat::R4:
return 8;
case GX_CTF_R4:
// 8-bit formats
case EFBCopyFormat::A8:
case EFBCopyFormat::R8_0x1:
case EFBCopyFormat::R8:
case EFBCopyFormat::G8:
case EFBCopyFormat::B8:
return 8;
case GX_CTF_RA4:
// 16-bit formats
case EFBCopyFormat::RA8:
case EFBCopyFormat::RGB565:
case EFBCopyFormat::RGB5A3:
case EFBCopyFormat::RG8:
case EFBCopyFormat::GB8:
return 4;
case GX_CTF_RA8:
return 4;
case GX_CTF_A8:
return 4;
case GX_CTF_R8:
return 4;
case GX_CTF_G8:
return 4;
case GX_CTF_B8:
return 4;
case GX_CTF_RG8:
return 4;
case GX_CTF_GB8:
return 4;
case GX_TF_Z8:
return 4;
case GX_TF_Z16:
return 4;
case GX_TF_Z24X8:
return 4;
case GX_CTF_Z4:
return 8;
case GX_CTF_Z8H:
return 4;
case GX_CTF_Z8M:
return 4;
case GX_CTF_Z8L:
return 4;
case GX_CTF_Z16R:
return 4;
case GX_CTF_Z16L:
// 32-bit formats
case EFBCopyFormat::RGBA8:
return 4;
default:
PanicAlert("Unsupported Texture Format (%08x)! (GetBlockHeightInTexels)", format);
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEFBCopyBlockWidthInTexels)",
static_cast<int>(format));
return 8;
}
}
int TexDecoder_GetEFBCopyBlockHeightInTexels(EFBCopyFormat format)
{
switch (format)
{
// 4-bit formats
case EFBCopyFormat::R4:
return 8;
// 8-bit formats
case EFBCopyFormat::A8:
case EFBCopyFormat::R8_0x1:
case EFBCopyFormat::R8:
case EFBCopyFormat::G8:
case EFBCopyFormat::B8:
return 4;
// 16-bit formats
case EFBCopyFormat::RA8:
case EFBCopyFormat::RGB565:
case EFBCopyFormat::RGB5A3:
case EFBCopyFormat::RG8:
case EFBCopyFormat::GB8:
return 4;
// 32-bit formats
case EFBCopyFormat::RGBA8:
return 4;
default:
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEFBCopyBlockHeightInTexels)",
static_cast<int>(format));
return 4;
}
}
// returns bytes
int TexDecoder_GetPaletteSize(int format)
int TexDecoder_GetPaletteSize(TextureFormat format)
{
switch (format)
{
case GX_TF_C4:
case TextureFormat::C4:
return 16 * 2;
case GX_TF_C8:
case TextureFormat::C8:
return 256 * 2;
case GX_TF_C14X2:
case TextureFormat::C14X2:
return 16384 * 2;
default:
return 0;
@ -251,51 +200,33 @@ int TexDecoder_GetPaletteSize(int format)
// Get the "in memory" texture format of an EFB copy's format.
// With the exception of c4/c8/c14 paletted texture formats (which are handled elsewhere)
// this is the format the game should be using when it is drawing an EFB copy back.
int TexDecoder_GetEfbCopyBaseFormat(int format)
TextureFormat TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat format)
{
switch (format)
{
case GX_TF_I4:
case GX_CTF_Z4:
case GX_CTF_R4:
return GX_TF_I4;
case GX_TF_I8:
case GX_CTF_A8:
case GX_CTF_R8:
case GX_CTF_G8:
case GX_CTF_B8:
case GX_TF_Z8:
case GX_CTF_Z8H:
case GX_CTF_Z8M:
case GX_CTF_Z8L:
return GX_TF_I8;
case GX_TF_IA4:
case GX_CTF_RA4:
return GX_TF_IA4;
case GX_TF_IA8:
case GX_TF_Z16:
case GX_CTF_RA8:
case GX_CTF_RG8:
case GX_CTF_GB8:
case GX_CTF_Z16R:
case GX_CTF_Z16L:
return GX_TF_IA8;
case GX_TF_RGB565:
return GX_TF_RGB565;
case GX_TF_RGB5A3:
return GX_TF_RGB5A3;
case GX_TF_RGBA8:
case GX_TF_Z24X8:
case GX_CTF_YUVA8:
return GX_TF_RGBA8;
// These formats can't be (directly) generated by EFB copies
case GX_TF_C4:
case GX_TF_C8:
case GX_TF_C14X2:
case GX_TF_CMPR:
case EFBCopyFormat::R4:
return TextureFormat::I4;
case EFBCopyFormat::A8:
case EFBCopyFormat::R8_0x1:
case EFBCopyFormat::R8:
case EFBCopyFormat::G8:
case EFBCopyFormat::B8:
return TextureFormat::I8;
case EFBCopyFormat::RA4:
return TextureFormat::IA4;
case EFBCopyFormat::RA8:
case EFBCopyFormat::RG8:
case EFBCopyFormat::GB8:
return TextureFormat::IA8;
case EFBCopyFormat::RGB565:
return TextureFormat::RGB565;
case EFBCopyFormat::RGB5A3:
return TextureFormat::RGB5A3;
case EFBCopyFormat::RGBA8:
return TextureFormat::RGBA8;
default:
PanicAlert("Unsupported Texture Format (%08x)! (GetEfbCopyBaseFormat)", format);
return format & 0xf;
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEFBCopyBaseFormat)", static_cast<int>(format));
return static_cast<TextureFormat>(format);
}
}
@ -320,7 +251,7 @@ static const char* texfmt[] = {
"CZ16L", "0x3D", "0x3E", "0x3F",
};
static void TexDecoder_DrawOverlay(u8* dst, int width, int height, int texformat)
static void TexDecoder_DrawOverlay(u8* dst, int width, int height, TextureFormat texformat)
{
int w = std::min(width, 40);
int h = std::min(height, 10);
@ -334,7 +265,7 @@ static void TexDecoder_DrawOverlay(u8* dst, int width, int height, int texformat
yoff = 0;
}
const char* fmt = texfmt[texformat & 15];
const char* fmt = texfmt[static_cast<int>(texformat) & 15];
while (*fmt)
{
int xcnt = 0;
@ -363,8 +294,8 @@ static void TexDecoder_DrawOverlay(u8* dst, int width, int height, int texformat
}
}
void TexDecoder_Decode(u8* dst, const u8* src, int width, int height, int texformat, const u8* tlut,
TlutFormat tlutfmt)
void TexDecoder_Decode(u8* dst, const u8* src, int width, int height, TextureFormat texformat,
const u8* tlut, TLUTFormat tlutfmt)
{
_TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlut, tlutfmt);
@ -409,23 +340,23 @@ static inline u32 DecodePixel_RGB5A3(u16 val)
return r | (g << 8) | (b << 16) | (a << 24);
}
static inline u32 DecodePixel_Paletted(u16 pixel, TlutFormat tlutfmt)
static inline u32 DecodePixel_Paletted(u16 pixel, TLUTFormat tlutfmt)
{
switch (tlutfmt)
{
case GX_TL_IA8:
case TLUTFormat::IA8:
return DecodePixel_IA8(pixel);
case GX_TL_RGB565:
case TLUTFormat::RGB565:
return DecodePixel_RGB565(Common::swap16(pixel));
case GX_TL_RGB5A3:
case TLUTFormat::RGB5A3:
return DecodePixel_RGB5A3(Common::swap16(pixel));
default:
return 0;
}
}
void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth, int texformat,
const u8* tlut_, TlutFormat tlutfmt)
void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth,
TextureFormat texformat, const u8* tlut_, TLUTFormat tlutfmt)
{
/* General formula for computing texture offset
//
@ -440,7 +371,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
switch (texformat)
{
case GX_TF_C4:
case TextureFormat::C4:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 3;
@ -459,7 +390,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt);
}
break;
case GX_TF_I4:
case TextureFormat::I4:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 3;
@ -480,7 +411,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
dst[3] = val;
}
break;
case GX_TF_I8:
case TextureFormat::I8:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 2;
@ -497,7 +428,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
dst[3] = val;
}
break;
case GX_TF_C8:
case TextureFormat::C8:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 2;
@ -513,7 +444,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt);
}
break;
case GX_TF_IA4:
case TextureFormat::IA4:
{
u16 sBlk = s >> 3;
u16 tBlk = t >> 2;
@ -532,7 +463,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
dst[3] = a;
}
break;
case GX_TF_IA8:
case TextureFormat::IA8:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
@ -548,7 +479,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_IA8(*valAddr);
}
break;
case GX_TF_C14X2:
case TextureFormat::C14X2:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
@ -567,7 +498,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt);
}
break;
case GX_TF_RGB565:
case TextureFormat::RGB565:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
@ -583,7 +514,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_RGB565(Common::swap16(*valAddr));
}
break;
case GX_TF_RGB5A3:
case TextureFormat::RGB5A3:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
@ -599,7 +530,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_RGB5A3(Common::swap16(*valAddr));
}
break;
case GX_TF_RGBA8:
case TextureFormat::RGBA8:
{
u16 sBlk = s >> 2;
u16 tBlk = t >> 2;
@ -618,7 +549,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
dst[2] = valAddr[33];
}
break;
case GX_TF_CMPR:
case TextureFormat::CMPR:
{
u16 sDxt = s >> 2;
u16 tDxt = t >> 2;

View File

@ -57,22 +57,22 @@ static inline u32 DecodePixel_RGB5A3(u16 val)
return r | (g << 8) | (b << 16) | (a << 24);
}
static inline u32 DecodePixel_Paletted(u16 pixel, TlutFormat tlutfmt)
static inline u32 DecodePixel_Paletted(u16 pixel, TLUTFormat tlutfmt)
{
switch (tlutfmt)
{
case GX_TL_IA8:
case TLUTFormat::IA8:
return DecodePixel_IA8(pixel);
case GX_TL_RGB565:
case TLUTFormat::RGB565:
return DecodePixel_RGB565(Common::swap16(pixel));
case GX_TL_RGB5A3:
case TLUTFormat::RGB5A3:
return DecodePixel_RGB5A3(Common::swap16(pixel));
default:
return 0;
}
}
static inline void DecodeBytes_C4(u32* dst, const u8* src, const u8* tlut_, TlutFormat tlutfmt)
static inline void DecodeBytes_C4(u32* dst, const u8* src, const u8* tlut_, TLUTFormat tlutfmt)
{
const u16* tlut = (u16*)tlut_;
for (int x = 0; x < 4; x++)
@ -83,7 +83,7 @@ static inline void DecodeBytes_C4(u32* dst, const u8* src, const u8* tlut_, Tlut
}
}
static inline void DecodeBytes_C8(u32* dst, const u8* src, const u8* tlut_, TlutFormat tlutfmt)
static inline void DecodeBytes_C8(u32* dst, const u8* src, const u8* tlut_, TLUTFormat tlutfmt)
{
const u16* tlut = (u16*)tlut_;
for (int x = 0; x < 8; x++)
@ -93,7 +93,7 @@ static inline void DecodeBytes_C8(u32* dst, const u8* src, const u8* tlut_, Tlut
}
}
static inline void DecodeBytes_C14X2(u32* dst, const u16* src, const u8* tlut_, TlutFormat tlutfmt)
static inline void DecodeBytes_C14X2(u32* dst, const u16* src, const u8* tlut_, TLUTFormat tlutfmt)
{
const u16* tlut = (u16*)tlut_;
for (int x = 0; x < 4; x++)
@ -195,21 +195,21 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch)
// TODO: complete SSE2 optimization of less often used texture formats.
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt)
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, TextureFormat texformat,
const u8* tlut, TLUTFormat tlutfmt)
{
const int Wsteps4 = (width + 3) / 4;
const int Wsteps8 = (width + 7) / 8;
switch (texformat)
{
case GX_TF_C4:
case TextureFormat::C4:
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++, xStep++)
DecodeBytes_C4(dst + (y + iy) * width + x, src + 4 * xStep, tlut, tlutfmt);
break;
case GX_TF_I4:
case TextureFormat::I4:
{
// Reference C implementation:
for (int y = 0; y < height; y += 8)
@ -225,7 +225,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
}
}
break;
case GX_TF_I8: // speed critical
case TextureFormat::I8: // speed critical
{
// Reference C implementation
for (int y = 0; y < height; y += 4)
@ -255,13 +255,13 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
}
}
break;
case GX_TF_C8:
case TextureFormat::C8:
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_C8((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut, tlutfmt);
break;
case GX_TF_IA4:
case TextureFormat::IA4:
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -269,7 +269,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
DecodeBytes_IA4(dst + (y + iy) * width + x, src + 8 * xStep);
}
break;
case GX_TF_IA8:
case TextureFormat::IA8:
{
// Reference C implementation:
for (int y = 0; y < height; y += 4)
@ -285,13 +285,13 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
}
}
break;
case GX_TF_C14X2:
case TextureFormat::C14X2:
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_C14X2(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut, tlutfmt);
break;
case GX_TF_RGB565:
case TextureFormat::RGB565:
{
// Reference C implementation.
for (int y = 0; y < height; y += 4)
@ -305,7 +305,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
}
}
break;
case GX_TF_RGB5A3:
case TextureFormat::RGB5A3:
{
// Reference C implementation:
for (int y = 0; y < height; y += 4)
@ -314,7 +314,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
DecodeBytes_RGB5A3(dst + (y + iy) * width + x, (u16*)src);
}
break;
case GX_TF_RGBA8: // speed critical
case TextureFormat::RGBA8: // speed critical
{
// Reference C implementation.
for (int y = 0; y < height; y += 4)
@ -327,7 +327,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
}
}
break;
case GX_TF_CMPR: // speed critical
case TextureFormat::CMPR: // speed critical
// The metroid games use this format almost exclusively.
{
for (int y = 0; y < height; y += 8)

View File

@ -212,12 +212,13 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch)
// free to make the assumption that addresses are multiples of 16 in the aligned case.
// TODO: complete SSE2 optimization of less often used texture formats.
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int height,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{
switch (tlutfmt)
{
case GX_TL_RGB5A3:
case TLUTFormat::RGB5A3:
{
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
@ -226,7 +227,7 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
}
break;
case GX_TL_IA8:
case TLUTFormat::IA8:
{
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
@ -235,7 +236,7 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
}
break;
case GX_TL_RGB565:
case TLUTFormat::RGB565:
{
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
@ -251,8 +252,8 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_I4_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
int Wsteps4, int Wsteps8)
TextureFormat texformat, const u8* tlut,
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{
const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL);
const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L);
@ -298,8 +299,9 @@ static void TexDecoder_DecodeImpl_I4_SSSE3(u32* dst, const u8* src, int width, i
}
}
static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int height,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{
const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL);
const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L);
@ -390,8 +392,8 @@ static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int hei
FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_I8_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
int Wsteps4, int Wsteps8)
TextureFormat texformat, const u8* tlut,
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{
// xsacha optimized with SSSE3 intrinsics
// Produces a ~10% speed improvement over SSE2 implementation
@ -419,8 +421,9 @@ static void TexDecoder_DecodeImpl_I8_SSSE3(u32* dst, const u8* src, int width, i
}
}
static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int height,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{
// JSD optimized with SSE2 intrinsics.
// Produces an ~86% speed improvement over reference C implementation.
@ -518,12 +521,13 @@ static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int hei
}
}
static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int height,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{
switch (tlutfmt)
{
case GX_TL_RGB5A3:
case TLUTFormat::RGB5A3:
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -532,7 +536,7 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
}
break;
case GX_TL_IA8:
case TLUTFormat::IA8:
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -541,7 +545,7 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
}
break;
case GX_TL_RGB565:
case TLUTFormat::RGB565:
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -555,8 +559,9 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
}
}
static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int height,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{
for (int y = 0; y < height; y += 4)
{
@ -572,8 +577,8 @@ static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int he
FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_IA8_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
int Wsteps4, int Wsteps8)
TextureFormat texformat, const u8* tlut,
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{
// xsacha optimized with SSSE3 intrinsics.
// Produces an ~50% speed improvement over SSE2 implementation.
@ -595,8 +600,9 @@ static void TexDecoder_DecodeImpl_IA8_SSSE3(u32* dst, const u8* src, int width,
}
}
static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8)
static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int height,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{
// JSD optimized with SSE2 intrinsics.
// Produces an ~80% speed improvement over reference C implementation.
@ -656,12 +662,12 @@ static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int he
}
static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{
switch (tlutfmt)
{
case GX_TL_RGB5A3:
case TLUTFormat::RGB5A3:
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
@ -670,7 +676,7 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
}
break;
case GX_TL_IA8:
case TLUTFormat::IA8:
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
@ -679,7 +685,7 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
}
break;
case GX_TL_RGB565:
case TLUTFormat::RGB565:
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
@ -694,8 +700,8 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
}
static void TexDecoder_DecodeImpl_RGB565(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
int Wsteps4, int Wsteps8)
TextureFormat texformat, const u8* tlut,
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{
// JSD optimized with SSE2 intrinsics.
// Produces an ~78% speed improvement over reference C implementation.
@ -766,8 +772,8 @@ static void TexDecoder_DecodeImpl_RGB565(u32* dst, const u8* src, int width, int
FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_RGB5A3_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
int Wsteps4, int Wsteps8)
TextureFormat texformat, const u8* tlut,
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{
const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL);
const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL);
@ -872,8 +878,8 @@ static void TexDecoder_DecodeImpl_RGB5A3_SSSE3(u32* dst, const u8* src, int widt
}
static void TexDecoder_DecodeImpl_RGB5A3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
int Wsteps4, int Wsteps8)
TextureFormat texformat, const u8* tlut,
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{
const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL);
const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL);
@ -993,8 +999,8 @@ static void TexDecoder_DecodeImpl_RGB5A3(u32* dst, const u8* src, int width, int
FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_RGBA8_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
int Wsteps4, int Wsteps8)
TextureFormat texformat, const u8* tlut,
TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{
// xsacha optimized with SSSE3 instrinsics
// Produces a ~30% speed improvement over SSE2 implementation
@ -1027,7 +1033,7 @@ static void TexDecoder_DecodeImpl_RGBA8_SSSE3(u32* dst, const u8* src, int width
}
static void TexDecoder_DecodeImpl_RGBA8(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{
// JSD optimized with SSE2 intrinsics
@ -1148,7 +1154,7 @@ static void TexDecoder_DecodeImpl_RGBA8(u32* dst, const u8* src, int width, int
}
static void TexDecoder_DecodeImpl_CMPR(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt,
TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{
// The metroid games use this format almost exclusively.
@ -1403,19 +1409,19 @@ static void TexDecoder_DecodeImpl_CMPR(u32* dst, const u8* src, int width, int h
}
}
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat,
const u8* tlut, TlutFormat tlutfmt)
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, TextureFormat texformat,
const u8* tlut, TLUTFormat tlutfmt)
{
int Wsteps4 = (width + 3) / 4;
int Wsteps8 = (width + 7) / 8;
switch (texformat)
{
case GX_TF_C4:
case TextureFormat::C4:
TexDecoder_DecodeImpl_C4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break;
case GX_TF_I4:
case TextureFormat::I4:
if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_I4_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8);
@ -1423,7 +1429,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
TexDecoder_DecodeImpl_I4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break;
case GX_TF_I8:
case TextureFormat::I8:
if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_I8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8);
@ -1431,15 +1437,15 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
TexDecoder_DecodeImpl_I8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break;
case GX_TF_C8:
case TextureFormat::C8:
TexDecoder_DecodeImpl_C8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break;
case GX_TF_IA4:
case TextureFormat::IA4:
TexDecoder_DecodeImpl_IA4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break;
case GX_TF_IA8:
case TextureFormat::IA8:
if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_IA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8);
@ -1448,17 +1454,17 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
Wsteps8);
break;
case GX_TF_C14X2:
case TextureFormat::C14X2:
TexDecoder_DecodeImpl_C14X2(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8);
break;
case GX_TF_RGB565:
case TextureFormat::RGB565:
TexDecoder_DecodeImpl_RGB565(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8);
break;
case GX_TF_RGB5A3:
case TextureFormat::RGB5A3:
if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_RGB5A3_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8);
@ -1467,7 +1473,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
Wsteps8);
break;
case GX_TF_RGBA8:
case TextureFormat::RGBA8:
if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_RGBA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8);
@ -1476,12 +1482,13 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
Wsteps8);
break;
case GX_TF_CMPR:
case TextureFormat::CMPR:
TexDecoder_DecodeImpl_CMPR(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break;
default:
PanicAlert("Unhandled texture format %d", texformat);
PanicAlert("Invalid Texture Format (0x%X)! (_TexDecoder_DecodeImpl)",
static_cast<int>(texformat));
break;
}
}