Merge pull request #3188 from lioncash/texcache

TextureCacheBase: Change CacheLinesPerRow to BytesPerRow
This commit is contained in:
Scott Mansell 2015-10-25 17:46:43 +13:00
commit bd20dd3962
4 changed files with 23 additions and 20 deletions

View File

@ -101,7 +101,9 @@ void PSTextureEncoder::Encode(u8* dst, const TextureCache::TCacheEntryBase *text
// Set up all the state for EFB encoding // Set up all the state for EFB encoding
{ {
D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(texture_entry->CacheLinesPerRow() * 8), FLOAT(texture_entry->NumBlocksY())); const u32 words_per_row = texture_entry->BytesPerRow() / sizeof(u32);
D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(words_per_row), FLOAT(texture_entry->NumBlocksY()));
D3D::context->RSSetViewports(1, &vp); D3D::context->RSSetViewports(1, &vp);
EFBRectangle fullSrcRect; EFBRectangle fullSrcRect;
@ -143,7 +145,7 @@ void PSTextureEncoder::Encode(u8* dst, const TextureCache::TCacheEntryBase *text
VertexShaderCache::GetSimpleInputLayout()); VertexShaderCache::GetSimpleInputLayout());
// Copy to staging buffer // Copy to staging buffer
D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, texture_entry->CacheLinesPerRow() * 8, texture_entry->NumBlocksY(), 1); D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, words_per_row, texture_entry->NumBlocksY(), 1);
D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox); D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox);
// Transfer staging buffer to GameCube/Wii RAM // Transfer staging buffer to GameCube/Wii RAM
@ -152,7 +154,7 @@ void PSTextureEncoder::Encode(u8* dst, const TextureCache::TCacheEntryBase *text
CHECK(SUCCEEDED(hr), "map staging buffer (0x%x)", hr); CHECK(SUCCEEDED(hr), "map staging buffer (0x%x)", hr);
u8* src = (u8*)map.pData; u8* src = (u8*)map.pData;
u32 readStride = std::min(texture_entry->CacheLinesPerRow() * 32, map.RowPitch); u32 readStride = std::min(texture_entry->BytesPerRow(), map.RowPitch);
for (unsigned int y = 0; y < texture_entry->NumBlocksY(); ++y) for (unsigned int y = 0; y < texture_entry->NumBlocksY(); ++y)
{ {
memcpy(dst, src, readStride); memcpy(dst, src, readStride);

View File

@ -279,7 +279,7 @@ void EncodeToRamFromTexture(u8 *dest_ptr, const TextureCache::TCacheEntryBase *t
source.left, source.top, texture_entry->native_width, bScaleByHalf ? 2 : 1); source.left, source.top, texture_entry->native_width, bScaleByHalf ? 2 : 1);
EncodeToRamUsingShader(source_texture, EncodeToRamUsingShader(source_texture,
dest_ptr, texture_entry->CacheLinesPerRow() * 32, texture_entry->NumBlocksY(), dest_ptr, texture_entry->BytesPerRow(), texture_entry->NumBlocksY(),
texture_entry->memory_stride, bScaleByHalf > 0 && !bFromZBuffer); texture_entry->memory_stride, bScaleByHalf > 0 && !bFromZBuffer);
} }

View File

@ -482,7 +482,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage)
TCacheEntryBase* entry = iter->second; TCacheEntryBase* entry = iter->second;
// Do not load strided EFB copies, they are not meant to be used directly // Do not load strided EFB copies, they are not meant to be used directly
if (entry->IsEfbCopy() && entry->native_width == nativeW && entry->native_height == nativeH && if (entry->IsEfbCopy() && entry->native_width == nativeW && entry->native_height == nativeH &&
entry->memory_stride == entry->CacheLinesPerRow() * 32) entry->memory_stride == entry->BytesPerRow())
{ {
// EFB copies have slightly different rules as EFB copy formats have different // EFB copies have slightly different rules as EFB copy formats have different
// meanings from texture formats. // meanings from texture formats.
@ -1068,7 +1068,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
// Invalidate all textures that overlap the range of our efb copy. // Invalidate all textures that overlap the range of our efb copy.
// Unless our efb copy has a weird stride, then we want avoid invalidating textures which // Unless our efb copy has a weird stride, then we want avoid invalidating textures which
// we might be able to do a partial texture update on. // we might be able to do a partial texture update on.
if (entry->memory_stride == entry->CacheLinesPerRow() * 32) if (entry->memory_stride == entry->BytesPerRow())
{ {
TexCache::iterator iter = textures_by_address.begin(); TexCache::iterator iter = textures_by_address.begin();
while (iter != textures_by_address.end()) while (iter != textures_by_address.end())
@ -1093,7 +1093,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
u32 address = dstAddr; u32 address = dstAddr;
for (u32 i = 0; i < entry->NumBlocksY(); i++) for (u32 i = 0; i < entry->NumBlocksY(); i++)
{ {
FifoRecorder::GetInstance().UseMemory(address, entry->CacheLinesPerRow() * 32, MemoryUpdate::TEXTURE_MAP, true); FifoRecorder::GetInstance().UseMemory(address, entry->BytesPerRow(), MemoryUpdate::TEXTURE_MAP, true);
address += entry->memory_stride; address += entry->memory_stride;
} }
} }
@ -1136,18 +1136,19 @@ TextureCache::TexCache::iterator TextureCache::FreeTexture(TexCache::iterator it
return textures_by_address.erase(iter); return textures_by_address.erase(iter);
} }
u32 TextureCache::TCacheEntryBase::CacheLinesPerRow() const u32 TextureCache::TCacheEntryBase::BytesPerRow() const
{ {
u32 blockW = TexDecoder_GetBlockWidthInTexels(format); const u32 blockW = TexDecoder_GetBlockWidthInTexels(format);
// Round up source height to multiple of block size
u32 actualWidth = ROUND_UP(native_width, blockW);
u32 numBlocksX = actualWidth / blockW; // Round up source height to multiple of block size
const u32 actualWidth = ROUND_UP(native_width, blockW);
const u32 numBlocksX = actualWidth / blockW;
// RGBA takes two cache lines per block; all others take one // RGBA takes two cache lines per block; all others take one
if (format == GX_TF_RGBA8) const u32 bytes_per_block = format == GX_TF_RGBA8 ? 64 : 32;
numBlocksX = numBlocksX * 2;
return numBlocksX; return numBlocksX * bytes_per_block;
} }
u32 TextureCache::TCacheEntryBase::NumBlocksY() const u32 TextureCache::TCacheEntryBase::NumBlocksY() const
@ -1164,7 +1165,7 @@ void TextureCache::TCacheEntryBase::SetEfbCopy(u32 stride)
is_efb_copy = true; is_efb_copy = true;
memory_stride = stride; memory_stride = stride;
_assert_msg_(VIDEO, memory_stride >= CacheLinesPerRow(), "Memory stride is too small"); _assert_msg_(VIDEO, memory_stride >= BytesPerRow(), "Memory stride is too small");
size_in_bytes = memory_stride * NumBlocksY(); size_in_bytes = memory_stride * NumBlocksY();
} }
@ -1174,7 +1175,7 @@ void TextureCache::TCacheEntryBase::Zero(u8* ptr)
{ {
for (u32 i = 0; i < NumBlocksY(); i++) for (u32 i = 0; i < NumBlocksY(); i++)
{ {
memset(ptr, 0, CacheLinesPerRow() * 32); memset(ptr, 0, BytesPerRow());
ptr += memory_stride; ptr += memory_stride;
} }
} }
@ -1182,7 +1183,7 @@ void TextureCache::TCacheEntryBase::Zero(u8* ptr)
u64 TextureCache::TCacheEntryBase::CalculateHash() const u64 TextureCache::TCacheEntryBase::CalculateHash() const
{ {
u8* ptr = Memory::GetPointer(addr); u8* ptr = Memory::GetPointer(addr);
if (memory_stride == CacheLinesPerRow() * 32) if (memory_stride == BytesPerRow())
{ {
return GetHash64(ptr, size_in_bytes, g_ActiveConfig.iSafeTextureCache_ColorSamples); return GetHash64(ptr, size_in_bytes, g_ActiveConfig.iSafeTextureCache_ColorSamples);
} }
@ -1201,7 +1202,7 @@ u64 TextureCache::TCacheEntryBase::CalculateHash() const
for (u32 i = 0; i < blocks; i++) for (u32 i = 0; i < blocks; i++)
{ {
// Multiply by a prime number to mix the hash up a bit. This prevents identical blocks from canceling each other out // Multiply by a prime number to mix the hash up a bit. This prevents identical blocks from canceling each other out
temp_hash = (temp_hash * 397) ^ GetHash64(ptr, CacheLinesPerRow() * 32, samples_per_row); temp_hash = (temp_hash * 397) ^ GetHash64(ptr, BytesPerRow(), samples_per_row);
ptr += memory_stride; ptr += memory_stride;
} }
return temp_hash; return temp_hash;

View File

@ -111,7 +111,7 @@ public:
bool IsEfbCopy() const { return is_efb_copy; } bool IsEfbCopy() const { return is_efb_copy; }
u32 NumBlocksY() const; u32 NumBlocksY() const;
u32 CacheLinesPerRow() const; u32 BytesPerRow() const;
void Zero(u8* ptr); void Zero(u8* ptr);