diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index b83f99991..d43b198ee 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -5972,8 +5972,8 @@ void GPUEngineA::_RenderLine_DispCapture_FIFOToBuffer(u16 *fifoLineBuffer) #ifdef ENABLE_SSE2 for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++) { - __m128i fifoColor = _mm_set_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); - _mm_store_si128((__m128i *)fifoLineBuffer + i, _mm_shuffle_epi32(fifoColor, 0x1B)); // We need to shuffle the four FIFO values back into the correct order, since they were originally loaded in reverse order. + const __m128i fifoColor = _mm_setr_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); + _mm_store_si128((__m128i *)fifoLineBuffer + i, fifoColor); } #else for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++) @@ -6592,8 +6592,7 @@ void GPUEngineA::_HandleDisplayModeMainMemory(const size_t l) const __m128i alphaBit = _mm_set1_epi16(0x8000); for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++) { - __m128i fifoColor = _mm_set_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); - fifoColor = _mm_shuffle_epi32(fifoColor, 0x1B); // We need to shuffle the four FIFO values back into the correct order, since they were originally loaded in reverse order. + const __m128i fifoColor = _mm_setr_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); _mm_store_si128((__m128i *)dst + i, _mm_or_si128(fifoColor, alphaBit)); } #else