diff --git a/Source/Core/Common/Common.h b/Source/Core/Common/Common.h index 8027e31878..c8847cfe57 100644 --- a/Source/Core/Common/Common.h +++ b/Source/Core/Common/Common.h @@ -73,14 +73,6 @@ private: // Memory leak checks #define CHECK_HEAP_INTEGRITY() -// Alignment - #define GC_ALIGNED16(x) __declspec(align(16)) x - #define GC_ALIGNED32(x) __declspec(align(32)) x - #define GC_ALIGNED64(x) __declspec(align(64)) x - #define GC_ALIGNED128(x) __declspec(align(128)) x - #define GC_ALIGNED16_DECL(x) __declspec(align(16)) x - #define GC_ALIGNED64_DECL(x) __declspec(align(64)) x - // Since they are always around on Windows #define HAVE_WX 1 #define HAVE_OPENAL 1 @@ -107,12 +99,6 @@ private: #define MAX_PATH PATH_MAX #define __forceinline inline __attribute__((always_inline)) -#define GC_ALIGNED16(x) __attribute__((aligned(16))) x -#define GC_ALIGNED32(x) __attribute__((aligned(32))) x -#define GC_ALIGNED64(x) __attribute__((aligned(64))) x -#define GC_ALIGNED128(x) __attribute__((aligned(128))) x -#define GC_ALIGNED16_DECL(x) __attribute__((aligned(16))) x -#define GC_ALIGNED64_DECL(x) __attribute__((aligned(64))) x #endif #ifdef _MSC_VER diff --git a/Source/Core/Core/HW/GPFifo.cpp b/Source/Core/Core/HW/GPFifo.cpp index ee09159ee0..befb444110 100644 --- a/Source/Core/Core/HW/GPFifo.cpp +++ b/Source/Core/Core/HW/GPFifo.cpp @@ -22,16 +22,16 @@ namespace GPFifo // 32 Byte gather pipe with extra space // Overfilling is no problem (up to the real limit), CheckGatherPipe will blast the // contents in nicely sized chunks - +// // Other optimizations to think about: - -// If the gp is NOT linked to the fifo, just blast to memory byte by word -// If the gp IS linked to the fifo, use a fast wrapping buffer and skip writing to memory - +// - If the GP is NOT linked to the FIFO, just blast to memory byte by word +// - If the GP IS linked to the FIFO, use a fast wrapping buffer and skip writing to memory +// // Both of these should actually work! Only problem is that we have to decide at run time, // the same function could use both methods. Compile 2 different versions of each such block? -u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes +// More room for the fastmodes +alignas(32) u8 m_gatherPipe[GATHER_PIPE_SIZE * 16]; // pipe counter u32 m_gatherPipeCount = 0; diff --git a/Source/Core/Core/HW/GPFifo.h b/Source/Core/Core/HW/GPFifo.h index 686120daa9..843ce37ee7 100644 --- a/Source/Core/Core/HW/GPFifo.h +++ b/Source/Core/Core/HW/GPFifo.h @@ -17,7 +17,8 @@ enum GATHER_PIPE_SIZE = 32 }; -extern u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes +// More room for the fastmodes +alignas(32) extern u8 m_gatherPipe[GATHER_PIPE_SIZE * 16]; // pipe counter extern u32 m_gatherPipeCount; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index e066b16225..8295c8a230 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -10,12 +10,12 @@ using namespace Gen; -static const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x0000000000000000ULL}; -static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; -static const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL}; -static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; -static const u64 GC_ALIGNED16(psGeneratedQNaN[2]) = {0x7FF8000000000000ULL, 0x7FF8000000000000ULL}; -static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000}; +alignas(16) static const u64 psSignBits[2] = {0x8000000000000000ULL, 0x0000000000000000ULL}; +alignas(16) static const u64 psSignBits2[2] = {0x8000000000000000ULL, 0x8000000000000000ULL}; +alignas(16) static const u64 psAbsMask[2] = {0x7FFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL}; +alignas(16) static const u64 psAbsMask2[2] = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; +alignas(16) static const u64 psGeneratedQNaN[2] = {0x7FF8000000000000ULL, 0x7FF8000000000000ULL}; +alignas(16) static const double half_qnan_and_s32_max[2] = {0x7FFFFFFF, -0x80000}; X64Reg Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), void (XEmitter::*sseOp)(X64Reg, const OpArg&), bool packed, bool preserve_inputs, bool roundRHS) diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index b0f38ca1ec..5165339aaf 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -205,12 +205,12 @@ void CommonAsmRoutines::GenMfcr() } // Safe + Fast Quantizers, originally from JITIL by magumagu -static const float GC_ALIGNED16(m_65535[4]) = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; -static const float GC_ALIGNED16(m_32767) = 32767.0f; -static const float GC_ALIGNED16(m_m32768) = -32768.0f; -static const float GC_ALIGNED16(m_255) = 255.0f; -static const float GC_ALIGNED16(m_127) = 127.0f; -static const float GC_ALIGNED16(m_m128) = -128.0f; +alignas(16) static const float m_65535[4] = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; +alignas(16) static const float m_32767 = 32767.0f; +alignas(16) static const float m_m32768 = -32768.0f; +alignas(16) static const float m_255 = 255.0f; +alignas(16) static const float m_127 = 127.0f; +alignas(16) static const float m_m128 = -128.0f; #define QUANTIZE_OVERFLOW_SAFE diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 399f1cbd6c..f21b12b944 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -104,7 +104,7 @@ static unsigned regReadUse(RegInfo& R, InstLoc I) } static u64 SlotSet[1000]; -static u8 GC_ALIGNED16(FSlotSet[16*1000]); +alignas(16) static u8 FSlotSet[16 * 1000]; static OpArg regLocForSlot(RegInfo& RI, unsigned slot) { @@ -760,7 +760,7 @@ static void regWriteExit(RegInfo& RI, InstLoc dest) } // Helper function to check floating point exceptions -static double GC_ALIGNED16(isSNANTemp[2][2]); +alignas(16) static double isSNANTemp[2][2]; static bool checkIsSNAN() { return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]); @@ -1742,7 +1742,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = fregURegWithMov(RI, I); - static const u32 GC_ALIGNED16(ssSignBits[4]) = {0x80000000}; + alignas(16) static const u32 ssSignBits[4] = {0x80000000}; Jit->PXOR(reg, M(ssSignBits)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); @@ -1754,7 +1754,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = fregURegWithMov(RI, I); - static const u64 GC_ALIGNED16(sdSignBits[2]) = {0x8000000000000000ULL}; + alignas(16) static const u64 sdSignBits[2] = {0x8000000000000000ULL}; Jit->PXOR(reg, M(sdSignBits)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); @@ -1766,7 +1766,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = fregURegWithMov(RI, I); - static const u32 GC_ALIGNED16(psSignBits[4]) = {0x80000000, 0x80000000}; + alignas(16) static const u32 psSignBits[4] = {0x80000000, 0x80000000}; Jit->PXOR(reg, M(psSignBits)); RI.fregs[reg] = I; fregNormalRegClear(RI, I); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 0f3884b475..428e3e5232 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -4,10 +4,10 @@ #include "Core/PowerPC/JitCommon/JitAsmCommon.h" -const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; -const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 }; +alignas(16) const u8 pbswapShuffle1x4[16] = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +alignas(16) const u8 pbswapShuffle2x4[16] = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 }; -const float GC_ALIGNED16(m_quantizeTableS[]) = +alignas(16) const float m_quantizeTableS[] = { (1ULL << 0), (1ULL << 0), (1ULL << 1), (1ULL << 1), (1ULL << 2), (1ULL << 2), (1ULL << 3), (1ULL << 3), (1ULL << 4), (1ULL << 4), (1ULL << 5), (1ULL << 5), (1ULL << 6), (1ULL << 6), (1ULL << 7), (1ULL << 7), @@ -35,7 +35,7 @@ const float GC_ALIGNED16(m_quantizeTableS[]) = 1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1), }; -const float GC_ALIGNED16(m_dequantizeTableS[]) = +alignas(16) const float m_dequantizeTableS[] = { 1.0 / (1ULL << 0), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3), @@ -63,4 +63,4 @@ const float GC_ALIGNED16(m_dequantizeTableS[]) = (1ULL << 4), (1ULL << 4), (1ULL << 3), (1ULL << 3), (1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1), }; -const float GC_ALIGNED16(m_one[]) = { 1.0f, 0.0f, 0.0f, 0.0f }; +alignas(16) const float m_one[] = { 1.0f, 0.0f, 0.0f, 0.0f }; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index 9f67ac4ffd..66d933687a 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -4,13 +4,13 @@ #pragma once -#include "Common/Common.h" +#include "Common/CommonTypes.h" -extern const u8 GC_ALIGNED16(pbswapShuffle1x4[16]); -extern const u8 GC_ALIGNED16(pbswapShuffle2x4[16]); -extern const float GC_ALIGNED16(m_one[]); -extern const float GC_ALIGNED16(m_quantizeTableS[]); -extern const float GC_ALIGNED16(m_dequantizeTableS[]); +alignas(16) extern const u8 pbswapShuffle1x4[16]; +alignas(16) extern const u8 pbswapShuffle2x4[16]; +alignas(16) extern const float m_one[]; +alignas(16) extern const float m_quantizeTableS[]; +alignas(16) extern const float m_dequantizeTableS[]; class CommonAsmRoutinesBase { diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index af6233f0fd..c4350624c0 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -691,8 +691,8 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, } } -static const u64 GC_ALIGNED16(psMantissaTruncate[2]) = {0xFFFFFFFFF8000000ULL, 0xFFFFFFFFF8000000ULL}; -static const u64 GC_ALIGNED16(psRoundBit[2]) = {0x8000000, 0x8000000}; +alignas(16) static const u64 psMantissaTruncate[2] = {0xFFFFFFFFF8000000ULL, 0xFFFFFFFFF8000000ULL}; +alignas(16) static const u64 psRoundBit[2] = {0x8000000, 0x8000000}; // Emulate the odd truncation/rounding that the PowerPC does on the RHS operand before // a single precision multiply. To be precise, it drops the low 28 bits of the mantissa, @@ -724,8 +724,8 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg } } -static u32 GC_ALIGNED16(temp32); -static u64 GC_ALIGNED16(temp64); +alignas(16) static u32 temp32; +alignas(16) static u64 temp64; // Since the following float conversion functions are used in non-arithmetic PPC float instructions, // they must convert floats bitexact and never flush denormals to zero or turn SNaNs into QNaNs. @@ -740,12 +740,12 @@ static u64 GC_ALIGNED16(temp64); //#define MORE_ACCURATE_DOUBLETOSINGLE #ifdef MORE_ACCURATE_DOUBLETOSINGLE -static const __m128i GC_ALIGNED16(double_exponent) = _mm_set_epi64x(0, 0x7ff0000000000000); -static const __m128i GC_ALIGNED16(double_fraction) = _mm_set_epi64x(0, 0x000fffffffffffff); -static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi64x(0, 0x8000000000000000); -static const __m128i GC_ALIGNED16(double_explicit_top_bit) = _mm_set_epi64x(0, 0x0010000000000000); -static const __m128i GC_ALIGNED16(double_top_two_bits) = _mm_set_epi64x(0, 0xc000000000000000); -static const __m128i GC_ALIGNED16(double_bottom_bits) = _mm_set_epi64x(0, 0x07ffffffe0000000); +alignas(16) static const __m128i double_exponent = _mm_set_epi64x(0, 0x7ff0000000000000); +alignas(16) static const __m128i double_fraction = _mm_set_epi64x(0, 0x000fffffffffffff); +alignas(16) static const __m128i double_sign_bit = _mm_set_epi64x(0, 0x8000000000000000); +alignas(16) static const __m128i double_explicit_top_bit = _mm_set_epi64x(0, 0x0010000000000000); +alignas(16) static const __m128i double_top_two_bits = _mm_set_epi64x(0, 0xc000000000000000); +alignas(16) static const __m128i double_bottom_bits = _mm_set_epi64x(0, 0x07ffffffe0000000); // This is the same algorithm used in the interpreter (and actual hardware) // The documentation states that the conversion of a double with an outside the @@ -816,12 +816,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) #else // MORE_ACCURATE_DOUBLETOSINGLE -static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi64x(0xffffffffffffffff, 0x7fffffffffffffff); -static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffbfffff); -static const __m128i GC_ALIGNED16(double_qnan_bit) = _mm_set_epi64x(0xffffffffffffffff, 0xfff7ffffffffffff); +alignas(16) static const __m128i double_sign_bit = _mm_set_epi64x(0xffffffffffffffff, 0x7fffffffffffffff); +alignas(16) static const __m128i single_qnan_bit = _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffbfffff); +alignas(16) static const __m128i double_qnan_bit = _mm_set_epi64x(0xffffffffffffffff, 0xfff7ffffffffffff); // Smallest positive double that results in a normalized single. -static const double GC_ALIGNED16(min_norm_single) = std::numeric_limits::min(); +alignas(16) static const double min_norm_single = std::numeric_limits::min(); void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) { @@ -895,9 +895,9 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr MOVDDUP(dst, R(dst)); } -static const u64 GC_ALIGNED16(psDoubleExp[2]) = {0x7FF0000000000000ULL, 0}; -static const u64 GC_ALIGNED16(psDoubleFrac[2]) = {0x000FFFFFFFFFFFFFULL, 0}; -static const u64 GC_ALIGNED16(psDoubleNoSign[2]) = {0x7FFFFFFFFFFFFFFFULL, 0}; +alignas(16) static const u64 psDoubleExp[2] = {0x7FF0000000000000ULL, 0}; +alignas(16) static const u64 psDoubleFrac[2] = {0x000FFFFFFFFFFFFFULL, 0}; +alignas(16) static const u64 psDoubleNoSign[2] = {0x7FFFFFFFFFFFFFFFULL, 0}; // TODO: it might be faster to handle FPRF in the same way as CR is currently handled for integer, storing // the result of each floating point op and calculating it when needed. This is trickier than for integers diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 057d41730f..0c7ccbee86 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -29,7 +29,7 @@ namespace PowerPC { // STATE_TO_SAVE -PowerPCState GC_ALIGNED16(ppcState); +PowerPCState ppcState; static volatile CPUState state = CPU_POWERDOWN; Interpreter * const interpreter = Interpreter::getInstance(); diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index dc91d488c8..eca88fb713 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -57,7 +57,7 @@ struct tlb_entry }; // This contains the entire state of the emulated PowerPC "Gekko" CPU. -struct GC_ALIGNED64(PowerPCState) +struct PowerPCState { u32 gpr[32]; // General purpose registers. r1 = stack pointer. @@ -108,7 +108,7 @@ struct GC_ALIGNED64(PowerPCState) // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. - GC_ALIGNED16(u64 ps[32][2]); + alignas(16) u64 ps[32][2]; u32 sr[16]; // Segment registers. diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 9dcd960037..430004b640 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -25,9 +25,9 @@ static const int TEXTURE_KILL_THRESHOLD = 60; static const int TEXTURE_POOL_KILL_THRESHOLD = 3; static const int FRAMECOUNT_INVALID = 0; -TextureCache *g_texture_cache; +TextureCache* g_texture_cache; -GC_ALIGNED16(u8 *TextureCache::temp) = nullptr; +alignas(16) u8* TextureCache::temp = nullptr; size_t TextureCache::temp_size; TextureCache::TexCache TextureCache::textures_by_address; diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index c012cad0fc..b0cd5f02b6 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -142,7 +142,7 @@ public: protected: TextureCache(); - static GC_ALIGNED16(u8 *temp); + alignas(16) static u8* temp; static size_t temp_size; private: diff --git a/Source/Core/VideoCommon/TextureDecoder.h b/Source/Core/VideoCommon/TextureDecoder.h index be0c8f44e0..249024fa3a 100644 --- a/Source/Core/VideoCommon/TextureDecoder.h +++ b/Source/Core/VideoCommon/TextureDecoder.h @@ -12,7 +12,7 @@ enum TMEM_SIZE = 1024 * 1024, TMEM_LINE_SIZE = 32, }; -extern GC_ALIGNED16(u8 texMem[TMEM_SIZE]); +alignas(16) extern u8 texMem[TMEM_SIZE]; enum TextureFormat { diff --git a/Source/Core/VideoCommon/TextureDecoder_Common.cpp b/Source/Core/VideoCommon/TextureDecoder_Common.cpp index 8fc1631b93..a1c7bff0c2 100644 --- a/Source/Core/VideoCommon/TextureDecoder_Common.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_Common.cpp @@ -5,7 +5,9 @@ #include #include -#include "Common/Common.h" +#include "Common/CommonFuncs.h" +#include "Common/CommonTypes.h" +#include "Common/Logging/Log.h" #include "VideoCommon/LookUpTables.h" #include "VideoCommon/sfont.inc" @@ -16,7 +18,7 @@ static bool TexFmt_Overlay_Center = false; // TRAM // STATE_TO_SAVE -GC_ALIGNED16(u8 texMem[TMEM_SIZE]); +alignas(16) u8 texMem[TMEM_SIZE]; int TexDecoder_GetTexelSizeInNibbles(int format) { diff --git a/Source/Core/VideoCommon/TextureDecoder_x64.cpp b/Source/Core/VideoCommon/TextureDecoder_x64.cpp index f68d35f842..3064b0f3c1 100644 --- a/Source/Core/VideoCommon/TextureDecoder_x64.cpp +++ b/Source/Core/VideoCommon/TextureDecoder_x64.cpp @@ -1065,8 +1065,8 @@ void _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, in const __m128i dxt = _mm_loadu_si128((__m128i *)(src + sizeof(struct DXTBlock) * 2 * xStep)); // Copy the 2-bit indices from each DXT block: - GC_ALIGNED16( u32 dxttmp[4] ); - _mm_store_si128((__m128i *)dxttmp, dxt); + alignas(16) u32 dxttmp[4]; + _mm_store_si128((__m128i*)dxttmp, dxt); u32 dxt0sel = dxttmp[1]; u32 dxt1sel = dxttmp[3]; @@ -1204,10 +1204,10 @@ void _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, in u32 *dst32 = ( dst + (y + z*4) * width + x ); // Copy the colors here: - GC_ALIGNED16( u32 colors0[4] ); - GC_ALIGNED16( u32 colors1[4] ); - _mm_store_si128((__m128i *)colors0, mmcolors0); - _mm_store_si128((__m128i *)colors1, mmcolors1); + alignas(16) u32 colors0[4]; + alignas(16) u32 colors1[4]; + _mm_store_si128((__m128i*)colors0, mmcolors0); + _mm_store_si128((__m128i*)colors1, mmcolors1); // Row 0: dst32[(width * 0) + 0] = colors0[(dxt0sel >> ((0*8)+6)) & 3]; diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index ebd0026237..4251acaaa0 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -21,7 +21,7 @@ ARM64Reg stride_reg = X11; ARM64Reg arraybase_reg = X10; ARM64Reg scale_reg = X9; -static const float GC_ALIGNED16(scale_factors[]) = +alignas(16) static const float scale_factors[] = { 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 4), 1.0 / (1ULL << 5), 1.0 / (1ULL << 6), 1.0 / (1ULL << 7), diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index 603fc2efb8..93d248938b 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -22,7 +22,7 @@ #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" -static float GC_ALIGNED16(g_fProjectionMatrix[16]); +alignas(16) static float g_fProjectionMatrix[16]; // track changes static bool bTexMatricesChanged[2], bPosNormalMatrixChanged, bProjectionChanged, bViewportChanged; diff --git a/Source/UnitTests/Common/BitFieldTest.cpp b/Source/UnitTests/Common/BitFieldTest.cpp index f3a6a4ca2a..cb49e24cf3 100644 --- a/Source/UnitTests/Common/BitFieldTest.cpp +++ b/Source/UnitTests/Common/BitFieldTest.cpp @@ -137,7 +137,7 @@ TEST(BitField, Alignment) }; #pragma pack() - GC_ALIGNED16(OddlyAlignedTestStruct test_struct); + alignas(16) OddlyAlignedTestStruct test_struct; TestUnion& object = test_struct.obj; static_assert(alignof(decltype(test_struct.obj.signed_1bit)) == 1, "Incorrect variable alignment");