- Fix some issues with building on older compilers.
This commit is contained in:
rogerman 2016-07-02 19:47:13 +00:00
parent 9683b7e070
commit 3c5461e786
3 changed files with 45 additions and 82 deletions

View File

@ -38,11 +38,18 @@
#include <smmintrin.h> #include <smmintrin.h>
#endif #endif
// Note: Technically, the shift count of palignr can be any value of [0-255]. But practically speaking, the
// shift count should be a value of [0-15]. If we assume that the value range will always be [0-15], we can
// then substitute the palignr instruction with an SSE2 equivalent.
#if defined(ENABLE_SSE2) && !defined(ENABLE_SSSE3)
#define _mm_alignr_epi8(a, b, immShiftCount) _mm_or_si128(_mm_slli_si128(a, 16-(immShiftCount)), _mm_srli_si128(b, (immShiftCount)))
#endif
// Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to // Note: The SSE4.1 version of pblendvb only requires that the MSBs of the 8-bit mask vector are set in order to
// pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit // pass the b byte through. However, our SSE2 substitute of pblendvb requires that all of the bits of the 8-bit
// mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it // mask vector are set. So when using this intrinsic in practice, just set/clear all mask bits together, and it
// should work fine for both SSE4.1 and SSE2. // should work fine for both SSE4.1 and SSE2.
#if !defined(_SMMINTRIN_H) && defined(__EMMINTRIN_H) #if defined(ENABLE_SSE2) && !defined(ENABLE_SSE4_1)
#define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a))) #define _mm_blendv_epi8(a, b, fullmask) _mm_or_si128(_mm_and_si128((fullmask), (b)), _mm_andnot_si128((fullmask), (a)))
#endif #endif

View File

@ -26,7 +26,7 @@
#include "slot2.h" #include "slot2.h"
#include "NDSSystem.h" #include "NDSSystem.h"
#include "utils/xstring.h" #include "utils/xstring.h"
#include "compat/getopt.h" #include <compat/getopt.h>
//#include "frontend/modules/mGetOpt.h" //to test with this, make sure global `optind` is initialized to 1 //#include "frontend/modules/mGetOpt.h" //to test with this, make sure global `optind` is initialized to 1
#define printerror(...) fprintf(stderr, __VA_ARGS__) #define printerror(...) fprintf(stderr, __VA_ARGS__)
@ -194,9 +194,9 @@ bool CommandLine::parse(int argc,char **argv)
{ "help", no_argument, &opt_help, 1 }, { "help", no_argument, &opt_help, 1 },
//user settings //user settings
{ "num-cores", required_argument, nullptr, OPT_NUMCORES }, { "num-cores", required_argument, NULL, OPT_NUMCORES },
{ "spu-synch", no_argument, &_spu_sync_mode, 1 }, { "spu-synch", no_argument, &_spu_sync_mode, 1 },
{ "spu-method", required_argument, nullptr, OPT_SPU_METHOD }, { "spu-method", required_argument, NULL, OPT_SPU_METHOD },
#ifndef HOST_WINDOWS #ifndef HOST_WINDOWS
{ "disable-sound", no_argument, &disable_sound, 1}, { "disable-sound", no_argument, &disable_sound, 1},
{ "disable-limiter", no_argument, &disable_limiter, 1}, { "disable-limiter", no_argument, &disable_limiter, 1},
@ -214,41 +214,41 @@ bool CommandLine::parse(int argc,char **argv)
{ "backupmem-db", no_argument, &autodetect_method, 1}, { "backupmem-db", no_argument, &autodetect_method, 1},
//system equipment //system equipment
{ "console-type", required_argument, nullptr, OPT_CONSOLE_TYPE }, { "console-type", required_argument, NULL, OPT_CONSOLE_TYPE },
{ "bios-arm9", required_argument, nullptr, OPT_ARM9}, { "bios-arm9", required_argument, NULL, OPT_ARM9},
{ "bios-arm7", required_argument, nullptr, OPT_ARM7}, { "bios-arm7", required_argument, NULL, OPT_ARM7},
{ "bios-swi", required_argument, &_bios_swi, 1}, { "bios-swi", required_argument, &_bios_swi, 1},
//slot-1 contents //slot-1 contents
{ "slot1", required_argument, nullptr, OPT_SLOT1}, { "slot1", required_argument, NULL, OPT_SLOT1},
{ "preload-rom", no_argument, &_load_to_memory, 1}, { "preload-rom", no_argument, &_load_to_memory, 1},
{ "slot1-fat-dir", required_argument, nullptr, OPT_SLOT1_FAT_DIR}, { "slot1-fat-dir", required_argument, NULL, OPT_SLOT1_FAT_DIR},
//slot-2 contents //slot-2 contents
{ "cflash-image", required_argument, nullptr, OPT_SLOT2_CFLASH_IMAGE}, { "cflash-image", required_argument, NULL, OPT_SLOT2_CFLASH_IMAGE},
{ "cflash-path", required_argument, nullptr, OPT_SLOT2_CFLASH_DIR}, { "cflash-path", required_argument, NULL, OPT_SLOT2_CFLASH_DIR},
{ "gbaslot-rom", required_argument, nullptr, OPT_SLOT2_GBAGAME}, { "gbaslot-rom", required_argument, NULL, OPT_SLOT2_GBAGAME},
//commands //commands
{ "start-paused", no_argument, &start_paused, 1}, { "start-paused", no_argument, &start_paused, 1},
{ "load-slot", required_argument, nullptr, OPT_LOAD_SLOT}, { "load-slot", required_argument, NULL, OPT_LOAD_SLOT},
{ "play-movie", required_argument, nullptr, OPT_PLAY_MOVIE}, { "play-movie", required_argument, NULL, OPT_PLAY_MOVIE},
{ "record-movie", required_argument, nullptr, OPT_RECORD_MOVIE}, { "record-movie", required_argument, NULL, OPT_RECORD_MOVIE},
//video filters //video filters
{ "scanline-filter-a", required_argument, nullptr, OPT_SCANLINES_A}, { "scanline-filter-a", required_argument, NULL, OPT_SCANLINES_A},
{ "scanline-filter-b", required_argument, nullptr, OPT_SCANLINES_B}, { "scanline-filter-b", required_argument, NULL, OPT_SCANLINES_B},
{ "scanline-filter-c", required_argument, nullptr, OPT_SCANLINES_C}, { "scanline-filter-c", required_argument, NULL, OPT_SCANLINES_C},
{ "scanline-filter-d", required_argument, nullptr, OPT_SCANLINES_D}, { "scanline-filter-d", required_argument, NULL, OPT_SCANLINES_D},
//debugging //debugging
#ifdef GDB_STUB #ifdef GDB_STUB
{ "arm9gdb", required_argument, nullptr, OPT_ARM9GDB}, { "arm9gdb", required_argument, NULL, OPT_ARM9GDB},
{ "arm7gdb", required_argument, nullptr, OPT_ARM7GDB}, { "arm7gdb", required_argument, NULL, OPT_ARM7GDB},
#endif #endif
//utilities //utilities
{ "advanscene-import", required_argument, nullptr, OPT_ADVANSCENE}, { "advanscene-import", required_argument, NULL, OPT_ADVANSCENE},
{0,0,0,0} {0,0,0,0}
}; };
@ -355,7 +355,7 @@ bool CommandLine::parse(int argc,char **argv)
free(_bios_arm9); free(_bios_arm9);
free(_bios_arm7); free(_bios_arm7);
_bios_arm9 = _bios_arm7 = nullptr; _bios_arm9 = _bios_arm7 = NULL;
//remaining argument should be an NDS file, and nothing more //remaining argument should be an NDS file, and nothing more
int remain = argc-optind; int remain = argc-optind;

View File

@ -904,42 +904,42 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
const __m128i clearColor1 = _mm_load_si128((__m128i *)(clearColorBuffer + srcIndex1)); const __m128i clearColor1 = _mm_load_si128((__m128i *)(clearColorBuffer + srcIndex1));
const __m128i clearDepth0 = _mm_load_si128((__m128i *)(clearDepthBuffer + srcIndex0)); const __m128i clearDepth0 = _mm_load_si128((__m128i *)(clearDepthBuffer + srcIndex0));
const __m128i clearDepth1 = _mm_load_si128((__m128i *)(clearDepthBuffer + srcIndex1)); const __m128i clearDepth1 = _mm_load_si128((__m128i *)(clearDepthBuffer + srcIndex1));
#ifdef ENABLE_SSSE3
switch (shiftCount) switch (shiftCount)
{ {
case 1: case 1:
clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 7 * sizeof(u16)); clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 14);
clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 7 * sizeof(u16)); clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 14);
break; break;
case 2: case 2:
clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 6 * sizeof(u16)); clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 12);
clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 6 * sizeof(u16)); clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 12);
break; break;
case 3: case 3:
clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 5 * sizeof(u16)); clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 10);
clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 5 * sizeof(u16)); clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 10);
break; break;
case 4: case 4:
clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 4 * sizeof(u16)); clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 8);
clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 4 * sizeof(u16)); clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 8);
break; break;
case 5: case 5:
clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 3 * sizeof(u16)); clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 6);
clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 3 * sizeof(u16)); clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 6);
break; break;
case 6: case 6:
clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 2 * sizeof(u16)); clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 4);
clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 2 * sizeof(u16)); clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 4);
break; break;
case 7: case 7:
clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 1 * sizeof(u16)); clearColor = _mm_alignr_epi8(clearColor1, clearColor0, 2);
clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 1 * sizeof(u16)); clearDepth_vec128 = _mm_alignr_epi8(clearDepth1, clearDepth0, 2);
break; break;
default: default:
@ -947,50 +947,6 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
clearDepth_vec128 = _mm_setzero_si128(); clearDepth_vec128 = _mm_setzero_si128();
break; break;
} }
#else
switch (shiftCount)
{
case 1:
clearColor = _mm_or_si128( _mm_slli_si128(clearColor1, 1 * sizeof(u16)), _mm_srli_si128(clearColor0, 7 * sizeof(u16)) );
clearDepth_vec128 = _mm_or_si128( _mm_slli_si128(clearDepth1, 1 * sizeof(u16)), _mm_srli_si128(clearDepth0, 7 * sizeof(u16)) );
break;
case 2:
clearColor = _mm_or_si128( _mm_slli_si128(clearColor1, 2 * sizeof(u16)), _mm_srli_si128(clearColor0, 6 * sizeof(u16)) );
clearDepth_vec128 = _mm_or_si128( _mm_slli_si128(clearDepth1, 2 * sizeof(u16)), _mm_srli_si128(clearDepth0, 6 * sizeof(u16)) );
break;
case 3:
clearColor = _mm_or_si128( _mm_slli_si128(clearColor1, 3 * sizeof(u16)), _mm_srli_si128(clearColor0, 5 * sizeof(u16)) );
clearDepth_vec128 = _mm_or_si128( _mm_slli_si128(clearDepth1, 3 * sizeof(u16)), _mm_srli_si128(clearDepth0, 5 * sizeof(u16)) );
break;
case 4:
clearColor = _mm_or_si128( _mm_slli_si128(clearColor1, 4 * sizeof(u16)), _mm_srli_si128(clearColor0, 4 * sizeof(u16)) );
clearDepth_vec128 = _mm_or_si128( _mm_slli_si128(clearDepth1, 4 * sizeof(u16)), _mm_srli_si128(clearDepth0, 4 * sizeof(u16)) );
break;
case 5:
clearColor = _mm_or_si128( _mm_slli_si128(clearColor1, 5 * sizeof(u16)), _mm_srli_si128(clearColor0, 3 * sizeof(u16)) );
clearDepth_vec128 = _mm_or_si128( _mm_slli_si128(clearDepth1, 5 * sizeof(u16)), _mm_srli_si128(clearDepth0, 3 * sizeof(u16)) );
break;
case 6:
clearColor = _mm_or_si128( _mm_slli_si128(clearColor1, 6 * sizeof(u16)), _mm_srli_si128(clearColor0, 2 * sizeof(u16)) );
clearDepth_vec128 = _mm_or_si128( _mm_slli_si128(clearDepth1, 6 * sizeof(u16)), _mm_srli_si128(clearDepth0, 2 * sizeof(u16)) );
break;
case 7:
clearColor = _mm_or_si128( _mm_slli_si128(clearColor1, 7 * sizeof(u16)), _mm_srli_si128(clearColor0, 1 * sizeof(u16)) );
clearDepth_vec128 = _mm_or_si128( _mm_slli_si128(clearDepth1, 7 * sizeof(u16)), _mm_srli_si128(clearDepth0, 1 * sizeof(u16)) );
break;
default:
clearColor = _mm_setzero_si128();
clearDepth_vec128 = _mm_setzero_si128();
break;
}
#endif
} }
const __m128i clearDepthValue = _mm_and_si128(clearDepth_vec128, _mm_set1_epi16(0x7FFF)); const __m128i clearDepthValue = _mm_and_si128(clearDepth_vec128, _mm_set1_epi16(0x7FFF));