From 9b370cd6028245c810f78f0d57e5eef74dcec997 Mon Sep 17 00:00:00 2001 From: rogerman Date: Wed, 15 Jul 2015 04:34:23 +0000 Subject: [PATCH] Render3D: - Fix possible crash due to aligned access. --- desmume/src/render3D.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index 9d08d0dda..7cafb8723 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -576,12 +576,12 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState) { // Copy the colors to the color buffer. Since we can only copy 8 elements at once, // we need to load-store twice. - _mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i + 8), _mm_load_si128((__m128i *)(clearColorBuffer + i + 8)) ); - _mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i), _mm_load_si128((__m128i *)(clearColorBuffer + i)) ); + _mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i + 8), _mm_loadu_si128((__m128i *)(clearColorBuffer + i + 8)) ); + _mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i), _mm_loadu_si128((__m128i *)(clearColorBuffer + i)) ); // Write the depth values to the depth buffer. - __m128i clearDepthHi_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i + 8)); - __m128i clearDepthLo_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i)); + __m128i clearDepthHi_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i + 8)); + __m128i clearDepthLo_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i)); clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, depthBitMask_vec128); clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, depthBitMask_vec128); @@ -603,8 +603,8 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState) this->clearImageDepthBuffer[i+ 0] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepthLo_vec128, 0)]; // Write the fog flags to the fog flag buffer. - clearDepthHi_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i + 8)); - clearDepthLo_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i)); + clearDepthHi_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i + 8)); + clearDepthLo_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i)); clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, fogBufferBitMask_vec128); clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, fogBufferBitMask_vec128); clearDepthHi_vec128 = _mm_srli_si128(clearDepthHi_vec128, 15);