Implement OGL sampler cache. Allows binding a texture multiple times with different parameters. Also possibly gives a very small speed improvement.

This commit is contained in:
Jordan Woyak 2013-02-19 18:22:38 -06:00
parent d93e57ff22
commit f2647a1216
8 changed files with 245 additions and 72 deletions

View File

@ -7,6 +7,7 @@ set(SRCS Src/FramebufferManager.cpp
Src/ProgramShaderCache.cpp
Src/RasterFont.cpp
Src/Render.cpp
Src/SamplerCache.cpp
Src/StreamBuffer.cpp
Src/TextureCache.cpp
Src/TextureConverter.cpp

View File

@ -205,6 +205,7 @@
<ClCompile Include="Src\ProgramShaderCache.cpp" />
<ClCompile Include="Src\RasterFont.cpp" />
<ClCompile Include="Src\Render.cpp" />
<ClCompile Include="Src\SamplerCache.cpp" />
<ClCompile Include="Src\StreamBuffer.cpp" />
<ClCompile Include="Src\stdafx.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
@ -228,6 +229,7 @@
<ClInclude Include="Src\ProgramShaderCache.h" />
<ClInclude Include="Src\RasterFont.h" />
<ClInclude Include="Src\Render.h" />
<ClInclude Include="Src\SamplerCache.h" />
<ClInclude Include="Src\StreamBuffer.h" />
<ClInclude Include="Src\stdafx.h" />
<ClInclude Include="Src\TextureCache.h" />

View File

@ -63,6 +63,7 @@
#include "FPSCounter.h"
#include "ConfigManager.h"
#include "VertexManager.h"
#include "SamplerCache.h"
#include "main.h" // Local
#ifdef _WIN32
@ -128,6 +129,7 @@ static const u32 EFB_CACHE_HEIGHT = (EFB_HEIGHT + EFB_CACHE_RECT_SIZE - 1) / EFB
static bool s_efbCacheValid[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT];
static std::vector<u32> s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PEEK_Z and PEEK_COLOR
static SamplerCache s_sampler_cache;
int GetNumMSAASamples(int MSAAMode)
{
@ -366,6 +368,8 @@ void Renderer::Shutdown()
delete s_pfont;
s_pfont = 0;
s_ShowEFBCopyRegions.Destroy();
s_sampler_cache.Clear();
}
void Renderer::Init()
@ -1464,7 +1468,11 @@ void Renderer::SetLineWidth()
void Renderer::SetSamplerState(int stage, int texindex)
{
// TODO
auto const& tex = bpmem.tex[texindex];
auto const& tm0 = tex.texMode0[stage];
auto const& tm1 = tex.texMode1[stage];
s_sampler_cache.SetSamplerState((texindex * 4) + stage, tm0, tm1);
}
void Renderer::SetInterlacingMode()

View File

@ -0,0 +1,133 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "SamplerCache.h"
namespace OGL
{
SamplerCache::SamplerCache()
: m_last_max_anisotropy()
{}
SamplerCache::~SamplerCache()
{
Clear();
}
void SamplerCache::SetSamplerState(int stage, const TexMode0& tm0, const TexMode1& tm1)
{
// TODO: can this go somewhere else?
if (m_last_max_anisotropy != g_ActiveConfig.iMaxAnisotropy)
{
m_last_max_anisotropy = g_ActiveConfig.iMaxAnisotropy;
Clear();
}
Params params(tm0, tm1);
// take equivalent forced linear when bForceFiltering
if (g_ActiveConfig.bForceFiltering)
{
params.tm0.min_filter |= 0x4;
params.tm0.mag_filter |= 0x1;
}
// TODO: Should keep a circular buffer for each stage of recently used samplers.
auto const& active_sampler = m_active_samplers[stage];
if (active_sampler.first != params || !active_sampler.second.sampler_id)
{
// Active sampler does not match parameters (or is invalid), bind the proper one.
auto const new_sampler = GetEntry(params);
glBindSampler(stage, new_sampler.second.sampler_id);
m_active_samplers[stage] = new_sampler;
}
//active_it->second.last_frame_used = frameCount;
}
auto SamplerCache::GetEntry(const Params& params) -> std::pair<Params, Value>
{
auto it = m_cache.find(params);
if (m_cache.end() == it)
{
// Sampler not found in cache, create it.
Value val;
glGenSamplers(1, &val.sampler_id);
SetParameters(val.sampler_id, params);
it = m_cache.insert(std::make_pair(params, val)).first;
// TODO: Maybe kill old samplers if the cache gets huge. It doesn't seem to get huge though.
//ERROR_LOG(VIDEO, "Sampler cache size is now %ld.", m_cache.size());
}
return *it;
}
void SamplerCache::SetParameters(GLuint sampler_id, const Params& params)
{
static const GLint min_filters[8] =
{
GL_NEAREST,
GL_NEAREST_MIPMAP_NEAREST,
GL_NEAREST_MIPMAP_LINEAR,
GL_NEAREST,
GL_LINEAR,
GL_LINEAR_MIPMAP_NEAREST,
GL_LINEAR_MIPMAP_LINEAR,
GL_LINEAR,
};
static const GLint wrap_settings[4] =
{
GL_CLAMP_TO_EDGE,
GL_REPEAT,
GL_MIRRORED_REPEAT,
GL_REPEAT,
};
auto& tm0 = params.tm0;
auto& tm1 = params.tm1;
glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, min_filters[tm0.min_filter % ARRAYSIZE(min_filters)]);
glSamplerParameteri(sampler_id, GL_TEXTURE_MAG_FILTER, tm0.mag_filter ? GL_LINEAR : GL_NEAREST);
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, wrap_settings[tm0.wrap_s]);
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, wrap_settings[tm0.wrap_t]);
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tm0.lod_bias / 32.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tm1.min_lod / 16.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tm1.max_lod / 16.f);
if (g_ActiveConfig.iMaxAnisotropy > 0)
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, (float)(1 << g_ActiveConfig.iMaxAnisotropy));
}
void SamplerCache::Clear()
{
for (auto it = m_cache.begin(); it != m_cache.end(); ++it)
{
glDeleteSamplers(1, &it->second.sampler_id);
}
m_cache.clear();
}
}

View File

@ -0,0 +1,80 @@
#ifndef INCLUDE_SAMPLER_CACHE_H_
#define INCLUDE_SAMPLER_CACHE_H_
#include <map>
#include "Render.h"
#include "GLUtil.h"
namespace OGL
{
class SamplerCache : NonCopyable
{
public:
SamplerCache();
~SamplerCache();
void SetSamplerState(int stage, const TexMode0& tm0, const TexMode1& tm1);
void Clear();
private:
struct Params
{
union
{
struct
{
TexMode0 tm0;
TexMode1 tm1;
};
u64 hex;
};
Params()
: hex()
{}
Params(const TexMode0& _tm0, const TexMode1& _tm1)
: tm0(_tm0)
, tm1(_tm1)
{
static_assert(sizeof(Params) == 8, "Assuming I can treat this as a 64bit int.");
}
bool operator<(const Params& other) const
{
return hex < other.hex;
}
bool operator!=(const Params& other) const
{
return hex != other.hex;
}
};
struct Value
{
Value()
: sampler_id()
// , last_frame_used()
{}
GLuint sampler_id;
//int last_frame_used;
};
void SetParameters(GLuint sampler_id, const Params& params);
std::pair<Params, Value> GetEntry(const Params& params);
std::map<Params, Value> m_cache;
std::pair<Params, Value> m_active_samplers[8];
int m_last_max_anisotropy;
};
}
#endif

View File

@ -111,34 +111,20 @@ TextureCache::TCacheEntry::~TCacheEntry()
TextureCache::TCacheEntry::TCacheEntry()
{
glGenTextures(1, &texture);
currmode.hex = 0;
currmode1.hex = 0;
GL_REPORT_ERRORD();
}
void TextureCache::TCacheEntry::Bind(unsigned int stage)
{
// TODO: is this already done somewhere else?
TexMode0 &tm0 = bpmem.tex[stage >> 2].texMode0[stage & 3];
TexMode1 &tm1 = bpmem.tex[stage >> 2].texMode1[stage & 3];
if(currmode.hex != tm0.hex || currmode1.hex != tm1.hex)
if (s_Textures[stage] != texture)
{
if(s_ActiveTexture != stage)
if (s_ActiveTexture != stage)
{
glActiveTexture(GL_TEXTURE0 + stage);
if(s_Textures[stage] != texture)
glBindTexture(GL_TEXTURE_2D, texture);
s_ActiveTexture = stage;
}
SetTextureParameters(tm0, tm1);
s_ActiveTexture = stage;
s_Textures[stage] = texture;
}
else if (s_Textures[stage] != texture)
{
if(s_ActiveTexture != stage)
glActiveTexture(GL_TEXTURE0 + stage);
glBindTexture(GL_TEXTURE_2D, texture);
s_ActiveTexture = stage;
s_Textures[stage] = texture;
}
}
@ -225,15 +211,23 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(unsigned int width,
void TextureCache::TCacheEntry::Load(unsigned int stage, unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int level)
{
if(s_ActiveTexture != stage)
if (s_ActiveTexture != stage)
{
glActiveTexture(GL_TEXTURE0 + stage);
if(s_Textures[stage] != texture)
glBindTexture(GL_TEXTURE_2D, texture);
s_ActiveTexture = stage;
s_Textures[stage] = texture;
s_ActiveTexture = stage;
}
if(level == 0 && m_tex_levels != 0)
if (s_Textures[stage] != texture)
{
glBindTexture(GL_TEXTURE_2D, texture);
s_Textures[stage] = texture;
}
// TODO: sloppy, just do this on creation?
if (level == 0)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, m_tex_levels - 1);
}
if (pcfmt != PC_TEX_FMT_DXT1)
{
@ -417,47 +411,6 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
g_renderer->RestoreAPIState();
}
void TextureCache::TCacheEntry::SetTextureParameters(const TexMode0 &newmode, const TexMode1 &newmode1)
{
const GLint c_MinLinearFilter[8] =
{
GL_NEAREST,
GL_NEAREST_MIPMAP_NEAREST,
GL_NEAREST_MIPMAP_LINEAR,
GL_NEAREST,
GL_LINEAR,
GL_LINEAR_MIPMAP_NEAREST,
GL_LINEAR_MIPMAP_LINEAR,
GL_LINEAR,
};
const GLint c_WrapSettings[4] =
{
GL_CLAMP_TO_EDGE,
GL_REPEAT,
GL_MIRRORED_REPEAT,
GL_REPEAT,
};
int filt = newmode.min_filter;
if (g_ActiveConfig.bForceFiltering && newmode.min_filter < 4)
filt += 4; // take equivalent forced linear
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, c_MinLinearFilter[filt & 7]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, (newmode.mag_filter || g_Config.bForceFiltering) ? GL_LINEAR : GL_NEAREST);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, newmode1.min_lod / 16.0f);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, newmode1.max_lod / 16.0f);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_LOD_BIAS, newmode.lod_bias / 32.0f);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, c_WrapSettings[newmode.wrap_s]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, c_WrapSettings[newmode.wrap_t]);
// TODO: Reset anisotrop when changed to 1
if (g_Config.iMaxAnisotropy >= 1)
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT,
(float)(1 << g_ActiveConfig.iMaxAnisotropy));
}
TextureCache::TextureCache()
{
const char *pColorMatrixProg =

View File

@ -65,11 +65,6 @@ private:
void Bind(unsigned int stage);
bool Save(const char filename[], unsigned int level);
private:
void SetTextureParameters(const TexMode0 &newmode, const TexMode1 &newmode1);
TexMode0 currmode;
TexMode1 currmode1;
};
~TextureCache();

View File

@ -217,6 +217,7 @@ void VertexManager::vFlush()
{
if (usedtextures & (1 << i))
{
g_renderer->SetSamplerState(i % 4, i / 4);
FourTexUnits &tex = bpmem.tex[i >> 2];
TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i,
(tex.texImage3[i&3].image_base/* & 0x1FFFFF*/) << 5,