diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index b632749c6..e7b37e054 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -2811,8 +2811,7 @@ void GPU_RenderLine(NDS_Screen *screen, const u16 l, bool skip) { __m128i fifoColor = _mm_set_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); fifoColor = _mm_shuffle_epi32(fifoColor, 0x1B); // We need to shuffle the four FIFO values back into the correct order, since they were originally loaded in reverse order. - - ((__m128i *)dstLine)[i] = fifoColor & fifoMask; + _mm_store_si128((__m128i *)dstLine + i, _mm_and_si128(fifoColor, fifoMask)); } #else for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++) diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index 9f895870e..9d08d0dda 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -519,9 +519,9 @@ Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6 a = _mm_slli_si128(a, 1); // Shift the A bit back to where it needs to be // Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in. - color = r | g | b; + color = _mm_or_si128(_mm_or_si128(r, g), b); color = _mm_packs_epi32(color, zero_vec128); - color |= a; + color = _mm_or_si128(color, a); _mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color); }