diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index 2e8c265d78..cc5b840a0c 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -26,7 +26,9 @@ #include "zerogs.h" #include "targets.h" #include "ZZoglShaders.h" -#include +#ifdef ZEROGS_SSE2 +#include +#endif #define RHA //#define RW @@ -2301,6 +2303,9 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info src += 32; dst += 64; } + // It is advise to use a fence instruction after non temporal move (mm_stream) instruction... + // store fence insures that previous store are finish before execute new one. + _mm_sfence(); #else SSE2_UnswizzleZ16Target(dst, src, targ->height * GPU_TEXWIDTH / 16); #endif diff --git a/plugins/zzogl-pg/opengl/x86.cpp b/plugins/zzogl-pg/opengl/x86.cpp index d29442d6f3..a315083e6c 100644 --- a/plugins/zzogl-pg/opengl/x86.cpp +++ b/plugins/zzogl-pg/opengl/x86.cpp @@ -22,7 +22,6 @@ #include "x86.h" #if defined(ZEROGS_SSE2) -#include #include #endif @@ -679,7 +678,7 @@ extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut) // Create a zero register. __m128i zero_128 = _mm_setzero_si128(); - if ((u32)vm & 0x0F) { + if ((u32)clut & 0x0F) { // Unaligned write. u16* clut_word_ptr = (u16*)clut;