Merge branch 'texcache-preload'

For further info, see revision 56ccfc5d9d4defb308e02a71d201aee9eef0a76e.
This commit is contained in:
NeoBrainX 2012-02-04 13:19:32 +01:00
commit 0bbb112298
10 changed files with 91 additions and 49 deletions

View File

@ -85,14 +85,14 @@ void LoadBPReg(const BPCmd &bp, BPMemory &bpMem)
void GetTlutLoadData(u32 &tlutAddr, u32 &memAddr, u32 &tlutXferCount, BPMemory &bpMem) void GetTlutLoadData(u32 &tlutAddr, u32 &memAddr, u32 &tlutXferCount, BPMemory &bpMem)
{ {
tlutAddr = (bpMem.tlutXferDest & 0x3FF) << 9; tlutAddr = (bpMem.tmem_config.tlut_dest & 0x3FF) << 9;
tlutXferCount = (bpMem.tlutXferDest & 0x1FFC00) >> 5; tlutXferCount = (bpMem.tmem_config.tlut_dest & 0x1FFC00) >> 5;
// TODO - figure out a cleaner way. // TODO - figure out a cleaner way.
if (Core::g_CoreStartupParameter.bWii) if (Core::g_CoreStartupParameter.bWii)
memAddr = bpmem.tlutXferSrc << 5; memAddr = bpmem.tmem_config.tlut_src << 5;
else else
memAddr = (bpmem.tlutXferSrc & 0xFFFFF) << 5; memAddr = (bpmem.tmem_config.tlut_src & 0xFFFFF) << 5;
} }
void LoadCPReg(u32 subCmd, u32 value, CPMemory &cpMem) void LoadCPReg(u32 subCmd, u32 value, CPMemory &cpMem)

View File

@ -65,10 +65,10 @@
#define BPMEM_UNKOWN_57 0x57 #define BPMEM_UNKOWN_57 0x57
#define BPMEM_REVBITS 0x58 #define BPMEM_REVBITS 0x58
#define BPMEM_SCISSOROFFSET 0x59 #define BPMEM_SCISSOROFFSET 0x59
#define BPMEM_UNKNOWN_60 0x60 #define BPMEM_PRELOAD_ADDR 0x60
#define BPMEM_UNKNOWN_61 0x61 #define BPMEM_PRELOAD_TMEMEVEN 0x61
#define BPMEM_UNKNOWN_62 0x62 #define BPMEM_PRELOAD_TMEMODD 0x62
#define BPMEM_TEXMODESYNC 0x63 #define BPMEM_PRELOAD_MODE 0x63
#define BPMEM_LOADTLUT0 0x64 #define BPMEM_LOADTLUT0 0x64
#define BPMEM_LOADTLUT1 0x65 #define BPMEM_LOADTLUT1 0x65
#define BPMEM_TEXINVALIDATE 0x66 #define BPMEM_TEXINVALIDATE 0x66
@ -487,10 +487,10 @@ union TexImage1
{ {
struct struct
{ {
u32 tmem_offset : 15; // we ignore texture caching for now, we do it ourselves u32 tmem_even : 15; // tmem line index for even LODs
u32 cache_width : 3; u32 cache_width : 3;
u32 cache_height : 3; u32 cache_height : 3;
u32 image_type : 1; u32 image_type : 1; // 1 if this texture is managed manually (0 means we'll autofetch the texture data whenever it changes)
}; };
u32 hex; u32 hex;
}; };
@ -499,7 +499,7 @@ union TexImage2
{ {
struct struct
{ {
u32 tmem_offset : 15; // we ignore texture caching for now, we do it ourselves u32 tmem_odd : 15; // tmem line index for odd LODs
u32 cache_width : 3; u32 cache_width : 3;
u32 cache_height : 3; u32 cache_height : 3;
}; };
@ -893,6 +893,25 @@ union UPE_Copy
} }
}; };
union BPU_PreloadTileInfo
{
u32 hex;
struct {
u32 count : 15;
u32 type : 2;
};
};
struct BPS_TmemConfig
{
u32 preload_addr;
u32 preload_tmem_even;
u32 preload_tmem_odd;
BPU_PreloadTileInfo preload_tile_info;
u32 tlut_src;
u32 tlut_dest;
u32 texinvalidate;
};
// All of BP memory // All of BP memory
@ -951,10 +970,8 @@ struct BPMemory
u32 boundbox1;//56 u32 boundbox1;//56
u32 unknown7[2];//57,58 u32 unknown7[2];//57,58
X10Y10 scissorOffset; //59 X10Y10 scissorOffset; //59
u32 unknown8[10]; //5a,5b,5c,5d, 5e,5f,60,61, 62, 63 (GXTexModeSync), 0x60-0x63 have to do with preloaded textures? u32 unknown8[6]; //5a,5b,5c,5d, 5e,5f
u32 tlutXferSrc; //64 BPS_TmemConfig tmem_config; // 60-66
u32 tlutXferDest; //65
u32 texinvalidate;//66
u32 metric; //67 u32 metric; //67
FieldMode fieldmode;//68 FieldMode fieldmode;//68
u32 unknown10[7];//69-6F u32 unknown10[7];//69-6F

View File

@ -30,6 +30,7 @@
#include "VertexLoader.h" #include "VertexLoader.h"
#include "VertexShaderManager.h" #include "VertexShaderManager.h"
#include "Thread.h" #include "Thread.h"
#include "HW/Memmap.h"
using namespace BPFunctions; using namespace BPFunctions;
@ -301,14 +302,14 @@ void BPWritten(const BPCmd& bp)
// TODO - figure out a cleaner way. // TODO - figure out a cleaner way.
if (GetConfig(CONFIG_ISWII)) if (GetConfig(CONFIG_ISWII))
ptr = GetPointer(bpmem.tlutXferSrc << 5); ptr = GetPointer(bpmem.tmem_config.tlut_src << 5);
else else
ptr = GetPointer((bpmem.tlutXferSrc & 0xFFFFF) << 5); ptr = GetPointer((bpmem.tmem_config.tlut_src & 0xFFFFF) << 5);
if (ptr) if (ptr)
memcpy_gc(texMem + tlutTMemAddr, ptr, tlutXferCount); memcpy_gc(texMem + tlutTMemAddr, ptr, tlutXferCount);
else else
PanicAlert("Invalid palette pointer %08x %08x %08x", bpmem.tlutXferSrc, bpmem.tlutXferSrc << 5, (bpmem.tlutXferSrc & 0xFFFFF)<< 5); PanicAlert("Invalid palette pointer %08x %08x %08x", bpmem.tmem_config.tlut_src, bpmem.tmem_config.tlut_src << 5, (bpmem.tmem_config.tlut_src & 0xFFFFF)<< 5);
// TODO(ector) : kill all textures that use this palette // TODO(ector) : kill all textures that use this palette
// Not sure if it's a good idea, though. For now, we hash texture palettes // Not sure if it's a good idea, though. For now, we hash texture palettes
@ -466,14 +467,22 @@ void BPWritten(const BPCmd& bp)
DEBUG_LOG(VIDEO, "Uknown BP Reg 0x57: %08x", bp.newvalue); DEBUG_LOG(VIDEO, "Uknown BP Reg 0x57: %08x", bp.newvalue);
break; break;
case BPMEM_UNKNOWN_60: case BPMEM_PRELOAD_ADDR:
case BPMEM_UNKNOWN_61: case BPMEM_PRELOAD_TMEMEVEN:
case BPMEM_UNKNOWN_62: case BPMEM_PRELOAD_TMEMODD: // Used when PRELOAD_MODE is set
// Cases added due to: http://code.google.com/p/dolphin-emu/issues/detail?id=360#c90
// Are these related to BBox?
break; break;
case BPMEM_TEXMODESYNC: // Always set to 0 when GX_TexModeSync() is called. case BPMEM_PRELOAD_MODE: // Set to 0 when GX_TexModeSync() is called.
// if this is different from 0, manual TMEM management is used.
if (bp.newvalue != 0)
{
// NOTE(neobrain): Apparently tmemodd doesn't affect hardware behavior at all (libogc uses it just as a buffer and switches its contents with tmemeven whenever this is called)
BPS_TmemConfig& tmem_cfg = bpmem.tmem_config;
u8* ram_ptr = Memory::GetPointer(tmem_cfg.preload_addr << 5);
u32 tmem_addr = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE;
u32 size = tmem_cfg.preload_tile_info.count * 32;
memcpy(texMem + tmem_addr, ram_ptr, size);
}
break; break;
// ------------------------------------------------ // ------------------------------------------------

View File

@ -173,12 +173,12 @@ void TextureCache::ClearRenderTargets()
iter = textures.begin(), iter = textures.begin(),
tcend = textures.end(); tcend = textures.end();
for (; iter!=tcend; ++iter) for (; iter!=tcend; ++iter)
iter->second->type = TCET_AUTOFETCH; iter->second->type = TCET_NORMAL;
} }
TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage, TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
u32 address, unsigned int width, unsigned int height, int texformat, u32 address, unsigned int width, unsigned int height, int texformat,
unsigned int tlutaddr, int tlutfmt, bool UseNativeMips, unsigned int maxlevel) unsigned int tlutaddr, int tlutfmt, bool UseNativeMips, unsigned int maxlevel, bool from_tmem)
{ {
if (0 == address) if (0 == address)
return NULL; return NULL;
@ -203,10 +203,12 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
if (isPaletteTexture) if (isPaletteTexture)
full_format = texformat | (tlutfmt << 16); full_format = texformat | (tlutfmt << 16);
u8* ptr = Memory::GetPointer(address);
const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat); const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
u8* src_data;
if (from_tmem) src_data = &texMem[bpmem.tex[stage/4].texImage1[stage%4].tmem_even * TMEM_LINE_SIZE];
else src_data = Memory::GetPointer(address);
tex_hash = GetHash64(ptr, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples); tex_hash = GetHash64(src_data, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
if (isPaletteTexture) if (isPaletteTexture)
{ {
const u32 palette_size = TexDecoder_GetPaletteSize(texformat); const u32 palette_size = TexDecoder_GetPaletteSize(texformat);
@ -252,7 +254,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
// //
// TODO: Don't we need to force texture decoding to RGBA8 for dynamic EFB copies? // TODO: Don't we need to force texture decoding to RGBA8 for dynamic EFB copies?
// TODO: Actually, it should be enough if the internal texture format matches... // TODO: Actually, it should be enough if the internal texture format matches...
if ((entry->type == TCET_AUTOFETCH && width == entry->native_width && height == entry->native_height && full_format == entry->format && entry->num_mipmaps == maxlevel) if ((entry->type == TCET_NORMAL && width == entry->native_width && height == entry->native_height && full_format == entry->format && entry->num_mipmaps == maxlevel)
|| (entry->type == TCET_EC_DYNAMIC && entry->native_width == width && entry->native_height == height)) || (entry->type == TCET_EC_DYNAMIC && entry->native_width == width && entry->native_height == height))
{ {
// reuse the texture // reuse the texture
@ -283,8 +285,9 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
} }
} }
// TODO: RGBA8 textures are stored non-continuously in tmem, that might cause problems when preloading is enabled
if (pcfmt == PC_TEX_FMT_NONE) if (pcfmt == PC_TEX_FMT_NONE)
pcfmt = TexDecoder_Decode(temp, ptr, expandedWidth, pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth,
expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures); expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
bool isPow2; bool isPow2;
@ -301,13 +304,13 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
if (NULL == entry) { if (NULL == entry) {
textures[texID] = entry = g_texture_cache->CreateTexture(width, height, expandedWidth, texLevels, pcfmt); textures[texID] = entry = g_texture_cache->CreateTexture(width, height, expandedWidth, texLevels, pcfmt);
// Sometimes, we can get around recreating a texture if only the number of mip levels gets changes // Sometimes, we can get around recreating a texture if only the number of mip levels changes
// e.g. if our texture cache entry got too many mipmap levels we can limit the number of used levels by setting the appropriate render states // e.g. if our texture cache entry got too many mipmap levels we can limit the number of used levels by setting the appropriate render states
// Thus, we don't update this member for every Load, but just whenever the texture gets recreated // Thus, we don't update this member for every Load, but just whenever the texture gets recreated
// //
// TODO: Won't we end up recreating textures all the time because maxlevel doesn't necessarily equal texLevels? // TODO: Won't we end up recreating textures all the time because maxlevel doesn't necessarily equal texLevels?
entry->num_mipmaps = maxlevel; // TODO: Does this actually work? We can't really adjust mipmap settings per-stage... entry->num_mipmaps = maxlevel; // TODO: Does this actually work? We can't really adjust mipmap settings per-stage...
entry->type = TCET_AUTOFETCH; entry->type = TCET_NORMAL;
GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);
} }
@ -315,13 +318,13 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
entry->SetGeneralParameters(address, texture_size, full_format, entry->num_mipmaps); entry->SetGeneralParameters(address, texture_size, full_format, entry->num_mipmaps);
entry->SetDimensions(nativeW, nativeH, width, height); entry->SetDimensions(nativeW, nativeH, width, height);
entry->hash = tex_hash; entry->hash = tex_hash;
if (g_ActiveConfig.bCopyEFBToTexture) entry->type = TCET_AUTOFETCH; if (g_ActiveConfig.bCopyEFBToTexture) entry->type = TCET_NORMAL;
else if (entry->IsEfbCopy()) entry->type = TCET_EC_DYNAMIC; else if (entry->IsEfbCopy()) entry->type = TCET_EC_DYNAMIC;
// load texture // load texture
entry->Load(width, height, expandedWidth, 0, (texLevels == 0)); entry->Load(width, height, expandedWidth, 0, (texLevels == 0));
// load mips // load mips - TODO: Loading mipmaps from tmem is untested!
if (texLevels > 1 && pcfmt != PC_TEX_FMT_NONE) if (texLevels > 1 && pcfmt != PC_TEX_FMT_NONE)
{ {
const unsigned int bsdepth = TexDecoder_GetTexelSizeInNibbles(texformat); const unsigned int bsdepth = TexDecoder_GetTexelSizeInNibbles(texformat);
@ -329,20 +332,31 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int stage,
unsigned int level = 1; unsigned int level = 1;
unsigned int mipWidth = (width + 1) >> 1; unsigned int mipWidth = (width + 1) >> 1;
unsigned int mipHeight = (height + 1) >> 1; unsigned int mipHeight = (height + 1) >> 1;
ptr += texture_size;
u8* ptr_even = NULL, *ptr_odd = NULL;
if (from_tmem)
{
ptr_even = &texMem[bpmem.tex[stage/4].texImage1[stage%4].tmem_even * TMEM_LINE_SIZE + texture_size];
ptr_odd = &texMem[bpmem.tex[stage/4].texImage2[stage%4].tmem_odd * TMEM_LINE_SIZE];
}
src_data += texture_size;
while ((mipHeight || mipWidth) && (level < texLevels)) while ((mipHeight || mipWidth) && (level < texLevels))
{ {
u8** ptr;
if (from_tmem) ptr = (level % 2) ? &ptr_odd : &ptr_even;
else ptr = &src_data;
const unsigned int currentWidth = (mipWidth > 0) ? mipWidth : 1; const unsigned int currentWidth = (mipWidth > 0) ? mipWidth : 1;
const unsigned int currentHeight = (mipHeight > 0) ? mipHeight : 1; const unsigned int currentHeight = (mipHeight > 0) ? mipHeight : 1;
expandedWidth = (currentWidth + bsw) & (~bsw); expandedWidth = (currentWidth + bsw) & (~bsw);
expandedHeight = (currentHeight + bsh) & (~bsh); expandedHeight = (currentHeight + bsh) & (~bsh);
TexDecoder_Decode(temp, ptr, expandedWidth, expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures); TexDecoder_Decode(temp, *ptr, expandedWidth, expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
entry->Load(currentWidth, currentHeight, expandedWidth, level, false); entry->Load(currentWidth, currentHeight, expandedWidth, level, false);
ptr += ((std::max(mipWidth, bsw) * std::max(mipHeight, bsh) * bsdepth) >> 1); *ptr += ((std::max(mipWidth, bsw) * std::max(mipHeight, bsh) * bsdepth) >> 1);
mipWidth >>= 1; mipWidth >>= 1;
mipHeight >>= 1; mipHeight >>= 1;
++level; ++level;

View File

@ -32,8 +32,7 @@ class TextureCache
public: public:
enum TexCacheEntryType enum TexCacheEntryType
{ {
TCET_AUTOFETCH, // Most textures, automatically fetched whenever they change TCET_NORMAL,
// TCET_PRELOADED, // Textures which reside in TMEM areas which are manually managed by the game
TCET_EC_VRAM, // EFB copy which sits in VRAM and is ready to be used TCET_EC_VRAM, // EFB copy which sits in VRAM and is ready to be used
TCET_EC_DYNAMIC, // EFB copy which sits in RAM and needs to be decoded before being used TCET_EC_DYNAMIC, // EFB copy which sits in RAM and needs to be decoded before being used
}; };
@ -115,7 +114,7 @@ public:
virtual TCacheEntryBase* CreateRenderTargetTexture(unsigned int scaled_tex_w, unsigned int scaled_tex_h) = 0; virtual TCacheEntryBase* CreateRenderTargetTexture(unsigned int scaled_tex_w, unsigned int scaled_tex_h) = 0;
static TCacheEntryBase* Load(unsigned int stage, u32 address, unsigned int width, unsigned int height, static TCacheEntryBase* Load(unsigned int stage, u32 address, unsigned int width, unsigned int height,
int format, unsigned int tlutaddr, int tlutfmt, bool UseNativeMips, unsigned int maxlevel); int format, unsigned int tlutaddr, int tlutfmt, bool UseNativeMips, unsigned int maxlevel, bool from_tmem);
static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, unsigned int srcFormat, static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, unsigned int srcFormat,
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf); const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf);

View File

@ -20,8 +20,8 @@
#include "Hash.h" #include "Hash.h"
enum enum
{ {
TMEM_SIZE = 1024*1024, TMEM_SIZE = 1024*1024,
HALFTMEM_SIZE = 512*1024 TMEM_LINE_SIZE = 32,
}; };
extern GC_ALIGNED16(u8 texMem[TMEM_SIZE]); extern GC_ALIGNED16(u8 texMem[TMEM_SIZE]);

View File

@ -228,7 +228,8 @@ void VertexManager::vFlush()
tex.texImage0[i&3].format, tex.texTlut[i&3].tmem_offset<<9, tex.texImage0[i&3].format, tex.texTlut[i&3].tmem_offset<<9,
tex.texTlut[i&3].tlut_format, tex.texTlut[i&3].tlut_format,
(tex.texMode0[i&3].min_filter & 3) && (tex.texMode0[i&3].min_filter != 8) && g_ActiveConfig.bUseNativeMips, (tex.texMode0[i&3].min_filter & 3) && (tex.texMode0[i&3].min_filter != 8) && g_ActiveConfig.bUseNativeMips,
(tex.texMode1[i&3].max_lod >> 4)); tex.texMode1[i&3].max_lod >> 4,
tex.texImage1[i&3].image_type);
if (tentry) if (tentry)
{ {

View File

@ -137,7 +137,8 @@ void VertexManager::vFlush()
tex.texImage0[i&3].format, tex.texTlut[i&3].tmem_offset<<9, tex.texImage0[i&3].format, tex.texTlut[i&3].tmem_offset<<9,
tex.texTlut[i&3].tlut_format, tex.texTlut[i&3].tlut_format,
(tex.texMode0[i&3].min_filter & 3) && (tex.texMode0[i&3].min_filter != 8) && g_ActiveConfig.bUseNativeMips, (tex.texMode0[i&3].min_filter & 3) && (tex.texMode0[i&3].min_filter != 8) && g_ActiveConfig.bUseNativeMips,
(tex.texMode1[i&3].max_lod >> 4)); tex.texMode1[i&3].max_lod >> 4,
tex.texImage1[i&3].image_type);
if (tentry) if (tentry)
{ {

View File

@ -155,7 +155,8 @@ void VertexManager::vFlush()
tex.texImage0[i&3].format, tex.texTlut[i&3].tmem_offset<<9, tex.texImage0[i&3].format, tex.texTlut[i&3].tmem_offset<<9,
tex.texTlut[i&3].tlut_format, tex.texTlut[i&3].tlut_format,
(tex.texMode0[i&3].min_filter & 3) && (tex.texMode0[i&3].min_filter != 8) && g_ActiveConfig.bUseNativeMips, (tex.texMode0[i&3].min_filter & 3) && (tex.texMode0[i&3].min_filter != 8) && g_ActiveConfig.bUseNativeMips,
(tex.texMode1[i&3].max_lod >> 4)); tex.texMode1[i&3].max_lod >> 4,
tex.texImage1[i&3].image_type);
if (tentry) if (tentry)
{ {

View File

@ -101,14 +101,14 @@ void SWBPWritten(int address, int newvalue)
// TODO - figure out a cleaner way. // TODO - figure out a cleaner way.
if (Core::g_CoreStartupParameter.bWii) if (Core::g_CoreStartupParameter.bWii)
ptr = Memory::GetPointer(bpmem.tlutXferSrc << 5); ptr = Memory::GetPointer(bpmem.tmem_config.tlut_src << 5);
else else
ptr = Memory::GetPointer((bpmem.tlutXferSrc & 0xFFFFF) << 5); ptr = Memory::GetPointer((bpmem.tmem_config.tlut_src & 0xFFFFF) << 5);
if (ptr) if (ptr)
memcpy_gc(texMem + tlutTMemAddr, ptr, tlutXferCount); memcpy_gc(texMem + tlutTMemAddr, ptr, tlutXferCount);
else else
PanicAlert("Invalid palette pointer %08x %08x %08x", bpmem.tlutXferSrc, bpmem.tlutXferSrc << 5, (bpmem.tlutXferSrc & 0xFFFFF)<< 5); PanicAlert("Invalid palette pointer %08x %08x %08x", bpmem.tmem_config.tlut_src, bpmem.tmem_config.tlut_src << 5, (bpmem.tmem_config.tlut_src & 0xFFFFF)<< 5);
break; break;
} }