Render3D:

- Fix possible crash due to aligned access.
2015-07-15 04:34:23 +00:00 · 2015-07-15 04:34:23 +00:00 · 9b370cd602
parent 3c9931b1c8
commit 9b370cd602
1 changed files with 6 additions and 6 deletions
--- a/desmume/src/render3D.cpp
+++ b/desmume/src/render3D.cpp
@ -576,12 +576,12 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
 			{
 				// Copy the colors to the color buffer. Since we can only copy 8 elements at once,
 				// we need to load-store twice.
-				_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i + 8), _mm_load_si128((__m128i *)(clearColorBuffer + i + 8)) );
-				_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i), _mm_load_si128((__m128i *)(clearColorBuffer + i)) );
+				_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i + 8), _mm_loadu_si128((__m128i *)(clearColorBuffer + i + 8)) );
+				_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i), _mm_loadu_si128((__m128i *)(clearColorBuffer + i)) );
 				
 				// Write the depth values to the depth buffer.
-				__m128i clearDepthHi_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i + 8));
-				__m128i clearDepthLo_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i));
+				__m128i clearDepthHi_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i + 8));
+				__m128i clearDepthLo_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i));
 				clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, depthBitMask_vec128);
 				clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, depthBitMask_vec128);
 				
@ -603,8 +603,8 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
 				this->clearImageDepthBuffer[i+ 0] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepthLo_vec128, 0)];
 				
 				// Write the fog flags to the fog flag buffer.
-				clearDepthHi_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i + 8));
-				clearDepthLo_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i));
+				clearDepthHi_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i + 8));
+				clearDepthLo_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i));
 				clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, fogBufferBitMask_vec128);
 				clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, fogBufferBitMask_vec128);
 				clearDepthHi_vec128 = _mm_srli_si128(clearDepthHi_vec128, 15);