From 51d287d9b053a9025e96ab48a39c047381594090 Mon Sep 17 00:00:00 2001 From: vlj Date: Tue, 2 Jun 2015 19:18:16 +0200 Subject: [PATCH] d3d12: Take alignment into account in streamBuffer And use it for texture upload --- rpcs3/Emu/RSX/D3D12/D3D12.h | 33 ++++++++++++++++------------ rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 2 +- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12.h b/rpcs3/Emu/RSX/D3D12/D3D12.h index 622622a56e..e23cb4f0de 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12.h @@ -31,8 +31,9 @@ void streamToBuffer(void* dst, void* src, size_t sizeInBytes) { for (unsigned i = 0; i < sizeInBytes / 16; i++) { - __m128i *srcPtr = (__m128i*) ((char*)src + i * 16); - _mm_stream_si128((__m128i*)((char*)dst + i * 16), *srcPtr); + + const __m128i &srcPtr = _mm_loadu_si128((__m128i*) ((char*)src + i * 16)); + _mm_stream_si128((__m128i*)((char*)dst + i * 16), srcPtr); } } @@ -44,20 +45,24 @@ inline void streamBuffer(void* dst, void* src, size_t sizeInBytes) { // Assume 64 bytes cache line - assert(powerOf2Align(sizeInBytes, 64)); - for (unsigned i = 0; i < sizeInBytes / 64; i++) + unsigned offset = 0; + bool isAligned = !((size_t)src & 15); + for (; (offset + 64) < sizeInBytes; offset += 64) { - char *line = (char*)src + i * 64; - _mm_prefetch(line, _MM_HINT_NTA); - __m128i *srcPtr = (__m128i*) (line); - _mm_stream_si128((__m128i*)((char*)dst + i * 64), *srcPtr); - srcPtr = (__m128i*) (line + 16); - _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 16), *srcPtr); - srcPtr = (__m128i*) (line + 32); - _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 32), *srcPtr); - srcPtr = (__m128i*) (line + 48); - _mm_stream_si128((__m128i*)((char*)dst + i * 64 + 48), *srcPtr); + char *line = (char*)src + offset; + char *dstline = (char*)dst + offset; + // prefetch next line + _mm_prefetch(line + 16, _MM_HINT_NTA); + __m128i srcPtr = isAligned ? _mm_load_si128((__m128i *)line) : _mm_loadu_si128((__m128i *)line); + _mm_stream_si128((__m128i*)dstline, srcPtr); + srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 16)) : _mm_loadu_si128((__m128i *)(line + 16)); + _mm_stream_si128((__m128i*)(dstline + 16), srcPtr); + srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 32)) : _mm_loadu_si128((__m128i *)(line + 32)); + _mm_stream_si128((__m128i*)(dstline + 32), srcPtr); + srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 48)) : _mm_loadu_si128((__m128i *)(line + 48)); + _mm_stream_si128((__m128i*)(dstline + 48), srcPtr); } + memcpy((char*)dst + offset, (char*)src + offset, sizeInBytes - offset); } inline diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index ae238f1dc2..e9ba950230 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -246,7 +246,7 @@ size_t D3D12GSRender::UploadTextures() } } else - streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); + streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch); } Texture->Unmap(0, nullptr);