From cd7690730b98a26b8eea39cf7a846e1327622b6e Mon Sep 17 00:00:00 2001 From: rogerman Date: Wed, 3 Aug 2016 18:14:44 +0000 Subject: [PATCH] GPU: - For SSE2 systems, when reading graphics data from main memory, eliminate the extraneous pshufd instruction. --- desmume/src/GPU.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index b83f99991..d43b198ee 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -5972,8 +5972,8 @@ void GPUEngineA::_RenderLine_DispCapture_FIFOToBuffer(u16 *fifoLineBuffer) #ifdef ENABLE_SSE2 for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++) { - __m128i fifoColor = _mm_set_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); - _mm_store_si128((__m128i *)fifoLineBuffer + i, _mm_shuffle_epi32(fifoColor, 0x1B)); // We need to shuffle the four FIFO values back into the correct order, since they were originally loaded in reverse order. + const __m128i fifoColor = _mm_setr_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); + _mm_store_si128((__m128i *)fifoLineBuffer + i, fifoColor); } #else for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(u32); i++) @@ -6592,8 +6592,7 @@ void GPUEngineA::_HandleDisplayModeMainMemory(const size_t l) const __m128i alphaBit = _mm_set1_epi16(0x8000); for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16) / sizeof(__m128i); i++) { - __m128i fifoColor = _mm_set_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); - fifoColor = _mm_shuffle_epi32(fifoColor, 0x1B); // We need to shuffle the four FIFO values back into the correct order, since they were originally loaded in reverse order. + const __m128i fifoColor = _mm_setr_epi32(DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv(), DISP_FIFOrecv()); _mm_store_si128((__m128i *)dst + i, _mm_or_si128(fifoColor, alphaBit)); } #else