Merge pull request #2972 from lioncash/align
General: Replace GC_ALIGN macros with alignas
This commit is contained in:
commit
c5685ba53a
|
@ -73,14 +73,6 @@ private:
|
|||
// Memory leak checks
|
||||
#define CHECK_HEAP_INTEGRITY()
|
||||
|
||||
// Alignment
|
||||
#define GC_ALIGNED16(x) __declspec(align(16)) x
|
||||
#define GC_ALIGNED32(x) __declspec(align(32)) x
|
||||
#define GC_ALIGNED64(x) __declspec(align(64)) x
|
||||
#define GC_ALIGNED128(x) __declspec(align(128)) x
|
||||
#define GC_ALIGNED16_DECL(x) __declspec(align(16)) x
|
||||
#define GC_ALIGNED64_DECL(x) __declspec(align(64)) x
|
||||
|
||||
// Since they are always around on Windows
|
||||
#define HAVE_WX 1
|
||||
#define HAVE_OPENAL 1
|
||||
|
@ -107,12 +99,6 @@ private:
|
|||
#define MAX_PATH PATH_MAX
|
||||
|
||||
#define __forceinline inline __attribute__((always_inline))
|
||||
#define GC_ALIGNED16(x) __attribute__((aligned(16))) x
|
||||
#define GC_ALIGNED32(x) __attribute__((aligned(32))) x
|
||||
#define GC_ALIGNED64(x) __attribute__((aligned(64))) x
|
||||
#define GC_ALIGNED128(x) __attribute__((aligned(128))) x
|
||||
#define GC_ALIGNED16_DECL(x) __attribute__((aligned(16))) x
|
||||
#define GC_ALIGNED64_DECL(x) __attribute__((aligned(64))) x
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
|
|
@ -22,16 +22,16 @@ namespace GPFifo
|
|||
// 32 Byte gather pipe with extra space
|
||||
// Overfilling is no problem (up to the real limit), CheckGatherPipe will blast the
|
||||
// contents in nicely sized chunks
|
||||
|
||||
//
|
||||
// Other optimizations to think about:
|
||||
|
||||
// If the gp is NOT linked to the fifo, just blast to memory byte by word
|
||||
// If the gp IS linked to the fifo, use a fast wrapping buffer and skip writing to memory
|
||||
|
||||
// - If the GP is NOT linked to the FIFO, just blast to memory byte by word
|
||||
// - If the GP IS linked to the FIFO, use a fast wrapping buffer and skip writing to memory
|
||||
//
|
||||
// Both of these should actually work! Only problem is that we have to decide at run time,
|
||||
// the same function could use both methods. Compile 2 different versions of each such block?
|
||||
|
||||
u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes
|
||||
// More room for the fastmodes
|
||||
alignas(32) u8 m_gatherPipe[GATHER_PIPE_SIZE * 16];
|
||||
|
||||
// pipe counter
|
||||
u32 m_gatherPipeCount = 0;
|
||||
|
|
|
@ -17,7 +17,8 @@ enum
|
|||
GATHER_PIPE_SIZE = 32
|
||||
};
|
||||
|
||||
extern u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes
|
||||
// More room for the fastmodes
|
||||
alignas(32) extern u8 m_gatherPipe[GATHER_PIPE_SIZE * 16];
|
||||
|
||||
// pipe counter
|
||||
extern u32 m_gatherPipeCount;
|
||||
|
|
|
@ -10,12 +10,12 @@
|
|||
|
||||
using namespace Gen;
|
||||
|
||||
static const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x0000000000000000ULL};
|
||||
static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||
static const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL};
|
||||
static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||
static const u64 GC_ALIGNED16(psGeneratedQNaN[2]) = {0x7FF8000000000000ULL, 0x7FF8000000000000ULL};
|
||||
static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000};
|
||||
alignas(16) static const u64 psSignBits[2] = {0x8000000000000000ULL, 0x0000000000000000ULL};
|
||||
alignas(16) static const u64 psSignBits2[2] = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||
alignas(16) static const u64 psAbsMask[2] = {0x7FFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL};
|
||||
alignas(16) static const u64 psAbsMask2[2] = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||
alignas(16) static const u64 psGeneratedQNaN[2] = {0x7FF8000000000000ULL, 0x7FF8000000000000ULL};
|
||||
alignas(16) static const double half_qnan_and_s32_max[2] = {0x7FFFFFFF, -0x80000};
|
||||
|
||||
X64Reg Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
|
||||
void (XEmitter::*sseOp)(X64Reg, const OpArg&), bool packed, bool preserve_inputs, bool roundRHS)
|
||||
|
|
|
@ -205,12 +205,12 @@ void CommonAsmRoutines::GenMfcr()
|
|||
}
|
||||
|
||||
// Safe + Fast Quantizers, originally from JITIL by magumagu
|
||||
static const float GC_ALIGNED16(m_65535[4]) = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
|
||||
static const float GC_ALIGNED16(m_32767) = 32767.0f;
|
||||
static const float GC_ALIGNED16(m_m32768) = -32768.0f;
|
||||
static const float GC_ALIGNED16(m_255) = 255.0f;
|
||||
static const float GC_ALIGNED16(m_127) = 127.0f;
|
||||
static const float GC_ALIGNED16(m_m128) = -128.0f;
|
||||
alignas(16) static const float m_65535[4] = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
|
||||
alignas(16) static const float m_32767 = 32767.0f;
|
||||
alignas(16) static const float m_m32768 = -32768.0f;
|
||||
alignas(16) static const float m_255 = 255.0f;
|
||||
alignas(16) static const float m_127 = 127.0f;
|
||||
alignas(16) static const float m_m128 = -128.0f;
|
||||
|
||||
#define QUANTIZE_OVERFLOW_SAFE
|
||||
|
||||
|
|
|
@ -104,7 +104,7 @@ static unsigned regReadUse(RegInfo& R, InstLoc I)
|
|||
}
|
||||
|
||||
static u64 SlotSet[1000];
|
||||
static u8 GC_ALIGNED16(FSlotSet[16*1000]);
|
||||
alignas(16) static u8 FSlotSet[16 * 1000];
|
||||
|
||||
static OpArg regLocForSlot(RegInfo& RI, unsigned slot)
|
||||
{
|
||||
|
@ -760,7 +760,7 @@ static void regWriteExit(RegInfo& RI, InstLoc dest)
|
|||
}
|
||||
|
||||
// Helper function to check floating point exceptions
|
||||
static double GC_ALIGNED16(isSNANTemp[2][2]);
|
||||
alignas(16) static double isSNANTemp[2][2];
|
||||
static bool checkIsSNAN()
|
||||
{
|
||||
return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]);
|
||||
|
@ -1742,7 +1742,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
break;
|
||||
|
||||
X64Reg reg = fregURegWithMov(RI, I);
|
||||
static const u32 GC_ALIGNED16(ssSignBits[4]) = {0x80000000};
|
||||
alignas(16) static const u32 ssSignBits[4] = {0x80000000};
|
||||
Jit->PXOR(reg, M(ssSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
|
@ -1754,7 +1754,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
break;
|
||||
|
||||
X64Reg reg = fregURegWithMov(RI, I);
|
||||
static const u64 GC_ALIGNED16(sdSignBits[2]) = {0x8000000000000000ULL};
|
||||
alignas(16) static const u64 sdSignBits[2] = {0x8000000000000000ULL};
|
||||
Jit->PXOR(reg, M(sdSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
|
@ -1766,7 +1766,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
break;
|
||||
|
||||
X64Reg reg = fregURegWithMov(RI, I);
|
||||
static const u32 GC_ALIGNED16(psSignBits[4]) = {0x80000000, 0x80000000};
|
||||
alignas(16) static const u32 psSignBits[4] = {0x80000000, 0x80000000};
|
||||
Jit->PXOR(reg, M(psSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
|
|
|
@ -4,10 +4,10 @@
|
|||
|
||||
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
|
||||
|
||||
const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
alignas(16) const u8 pbswapShuffle1x4[16] = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
alignas(16) const u8 pbswapShuffle2x4[16] = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
|
||||
const float GC_ALIGNED16(m_quantizeTableS[]) =
|
||||
alignas(16) const float m_quantizeTableS[] =
|
||||
{
|
||||
(1ULL << 0), (1ULL << 0), (1ULL << 1), (1ULL << 1), (1ULL << 2), (1ULL << 2), (1ULL << 3), (1ULL << 3),
|
||||
(1ULL << 4), (1ULL << 4), (1ULL << 5), (1ULL << 5), (1ULL << 6), (1ULL << 6), (1ULL << 7), (1ULL << 7),
|
||||
|
@ -35,7 +35,7 @@ const float GC_ALIGNED16(m_quantizeTableS[]) =
|
|||
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
|
||||
};
|
||||
|
||||
const float GC_ALIGNED16(m_dequantizeTableS[]) =
|
||||
alignas(16) const float m_dequantizeTableS[] =
|
||||
{
|
||||
1.0 / (1ULL << 0), 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 1),
|
||||
1.0 / (1ULL << 2), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3), 1.0 / (1ULL << 3),
|
||||
|
@ -63,4 +63,4 @@ const float GC_ALIGNED16(m_dequantizeTableS[]) =
|
|||
(1ULL << 4), (1ULL << 4), (1ULL << 3), (1ULL << 3), (1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1),
|
||||
};
|
||||
|
||||
const float GC_ALIGNED16(m_one[]) = { 1.0f, 0.0f, 0.0f, 0.0f };
|
||||
alignas(16) const float m_one[] = { 1.0f, 0.0f, 0.0f, 0.0f };
|
||||
|
|
|
@ -4,13 +4,13 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "Common/Common.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
extern const u8 GC_ALIGNED16(pbswapShuffle1x4[16]);
|
||||
extern const u8 GC_ALIGNED16(pbswapShuffle2x4[16]);
|
||||
extern const float GC_ALIGNED16(m_one[]);
|
||||
extern const float GC_ALIGNED16(m_quantizeTableS[]);
|
||||
extern const float GC_ALIGNED16(m_dequantizeTableS[]);
|
||||
alignas(16) extern const u8 pbswapShuffle1x4[16];
|
||||
alignas(16) extern const u8 pbswapShuffle2x4[16];
|
||||
alignas(16) extern const float m_one[];
|
||||
alignas(16) extern const float m_quantizeTableS[];
|
||||
alignas(16) extern const float m_dequantizeTableS[];
|
||||
|
||||
class CommonAsmRoutinesBase
|
||||
{
|
||||
|
|
|
@ -691,8 +691,8 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&,
|
|||
}
|
||||
}
|
||||
|
||||
static const u64 GC_ALIGNED16(psMantissaTruncate[2]) = {0xFFFFFFFFF8000000ULL, 0xFFFFFFFFF8000000ULL};
|
||||
static const u64 GC_ALIGNED16(psRoundBit[2]) = {0x8000000, 0x8000000};
|
||||
alignas(16) static const u64 psMantissaTruncate[2] = {0xFFFFFFFFF8000000ULL, 0xFFFFFFFFF8000000ULL};
|
||||
alignas(16) static const u64 psRoundBit[2] = {0x8000000, 0x8000000};
|
||||
|
||||
// Emulate the odd truncation/rounding that the PowerPC does on the RHS operand before
|
||||
// a single precision multiply. To be precise, it drops the low 28 bits of the mantissa,
|
||||
|
@ -724,8 +724,8 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg
|
|||
}
|
||||
}
|
||||
|
||||
static u32 GC_ALIGNED16(temp32);
|
||||
static u64 GC_ALIGNED16(temp64);
|
||||
alignas(16) static u32 temp32;
|
||||
alignas(16) static u64 temp64;
|
||||
|
||||
// Since the following float conversion functions are used in non-arithmetic PPC float instructions,
|
||||
// they must convert floats bitexact and never flush denormals to zero or turn SNaNs into QNaNs.
|
||||
|
@ -740,12 +740,12 @@ static u64 GC_ALIGNED16(temp64);
|
|||
//#define MORE_ACCURATE_DOUBLETOSINGLE
|
||||
#ifdef MORE_ACCURATE_DOUBLETOSINGLE
|
||||
|
||||
static const __m128i GC_ALIGNED16(double_exponent) = _mm_set_epi64x(0, 0x7ff0000000000000);
|
||||
static const __m128i GC_ALIGNED16(double_fraction) = _mm_set_epi64x(0, 0x000fffffffffffff);
|
||||
static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi64x(0, 0x8000000000000000);
|
||||
static const __m128i GC_ALIGNED16(double_explicit_top_bit) = _mm_set_epi64x(0, 0x0010000000000000);
|
||||
static const __m128i GC_ALIGNED16(double_top_two_bits) = _mm_set_epi64x(0, 0xc000000000000000);
|
||||
static const __m128i GC_ALIGNED16(double_bottom_bits) = _mm_set_epi64x(0, 0x07ffffffe0000000);
|
||||
alignas(16) static const __m128i double_exponent = _mm_set_epi64x(0, 0x7ff0000000000000);
|
||||
alignas(16) static const __m128i double_fraction = _mm_set_epi64x(0, 0x000fffffffffffff);
|
||||
alignas(16) static const __m128i double_sign_bit = _mm_set_epi64x(0, 0x8000000000000000);
|
||||
alignas(16) static const __m128i double_explicit_top_bit = _mm_set_epi64x(0, 0x0010000000000000);
|
||||
alignas(16) static const __m128i double_top_two_bits = _mm_set_epi64x(0, 0xc000000000000000);
|
||||
alignas(16) static const __m128i double_bottom_bits = _mm_set_epi64x(0, 0x07ffffffe0000000);
|
||||
|
||||
// This is the same algorithm used in the interpreter (and actual hardware)
|
||||
// The documentation states that the conversion of a double with an outside the
|
||||
|
@ -816,12 +816,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
|
||||
#else // MORE_ACCURATE_DOUBLETOSINGLE
|
||||
|
||||
static const __m128i GC_ALIGNED16(double_sign_bit) = _mm_set_epi64x(0xffffffffffffffff, 0x7fffffffffffffff);
|
||||
static const __m128i GC_ALIGNED16(single_qnan_bit) = _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffbfffff);
|
||||
static const __m128i GC_ALIGNED16(double_qnan_bit) = _mm_set_epi64x(0xffffffffffffffff, 0xfff7ffffffffffff);
|
||||
alignas(16) static const __m128i double_sign_bit = _mm_set_epi64x(0xffffffffffffffff, 0x7fffffffffffffff);
|
||||
alignas(16) static const __m128i single_qnan_bit = _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffbfffff);
|
||||
alignas(16) static const __m128i double_qnan_bit = _mm_set_epi64x(0xffffffffffffffff, 0xfff7ffffffffffff);
|
||||
|
||||
// Smallest positive double that results in a normalized single.
|
||||
static const double GC_ALIGNED16(min_norm_single) = std::numeric_limits<float>::min();
|
||||
alignas(16) static const double min_norm_single = std::numeric_limits<float>::min();
|
||||
|
||||
void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
||||
{
|
||||
|
@ -895,9 +895,9 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
|
|||
MOVDDUP(dst, R(dst));
|
||||
}
|
||||
|
||||
static const u64 GC_ALIGNED16(psDoubleExp[2]) = {0x7FF0000000000000ULL, 0};
|
||||
static const u64 GC_ALIGNED16(psDoubleFrac[2]) = {0x000FFFFFFFFFFFFFULL, 0};
|
||||
static const u64 GC_ALIGNED16(psDoubleNoSign[2]) = {0x7FFFFFFFFFFFFFFFULL, 0};
|
||||
alignas(16) static const u64 psDoubleExp[2] = {0x7FF0000000000000ULL, 0};
|
||||
alignas(16) static const u64 psDoubleFrac[2] = {0x000FFFFFFFFFFFFFULL, 0};
|
||||
alignas(16) static const u64 psDoubleNoSign[2] = {0x7FFFFFFFFFFFFFFFULL, 0};
|
||||
|
||||
// TODO: it might be faster to handle FPRF in the same way as CR is currently handled for integer, storing
|
||||
// the result of each floating point op and calculating it when needed. This is trickier than for integers
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace PowerPC
|
|||
{
|
||||
|
||||
// STATE_TO_SAVE
|
||||
PowerPCState GC_ALIGNED16(ppcState);
|
||||
PowerPCState ppcState;
|
||||
static volatile CPUState state = CPU_POWERDOWN;
|
||||
|
||||
Interpreter * const interpreter = Interpreter::getInstance();
|
||||
|
|
|
@ -57,7 +57,7 @@ struct tlb_entry
|
|||
};
|
||||
|
||||
// This contains the entire state of the emulated PowerPC "Gekko" CPU.
|
||||
struct GC_ALIGNED64(PowerPCState)
|
||||
struct PowerPCState
|
||||
{
|
||||
u32 gpr[32]; // General purpose registers. r1 = stack pointer.
|
||||
|
||||
|
@ -108,7 +108,7 @@ struct GC_ALIGNED64(PowerPCState)
|
|||
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
|
||||
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
|
||||
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
|
||||
GC_ALIGNED16(u64 ps[32][2]);
|
||||
alignas(16) u64 ps[32][2];
|
||||
|
||||
u32 sr[16]; // Segment registers.
|
||||
|
||||
|
|
|
@ -26,9 +26,9 @@ static const int TEXTURE_KILL_THRESHOLD = 60;
|
|||
static const int TEXTURE_POOL_KILL_THRESHOLD = 3;
|
||||
static const int FRAMECOUNT_INVALID = 0;
|
||||
|
||||
TextureCache *g_texture_cache;
|
||||
TextureCache* g_texture_cache;
|
||||
|
||||
GC_ALIGNED16(u8 *TextureCache::temp) = nullptr;
|
||||
alignas(16) u8* TextureCache::temp = nullptr;
|
||||
size_t TextureCache::temp_size;
|
||||
|
||||
TextureCache::TexCache TextureCache::textures_by_address;
|
||||
|
|
|
@ -142,7 +142,7 @@ public:
|
|||
protected:
|
||||
TextureCache();
|
||||
|
||||
static GC_ALIGNED16(u8 *temp);
|
||||
alignas(16) static u8* temp;
|
||||
static size_t temp_size;
|
||||
|
||||
private:
|
||||
|
|
|
@ -12,7 +12,7 @@ enum
|
|||
TMEM_SIZE = 1024 * 1024,
|
||||
TMEM_LINE_SIZE = 32,
|
||||
};
|
||||
extern GC_ALIGNED16(u8 texMem[TMEM_SIZE]);
|
||||
alignas(16) extern u8 texMem[TMEM_SIZE];
|
||||
|
||||
enum TextureFormat
|
||||
{
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "Common/Common.h"
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Logging/Log.h"
|
||||
|
||||
#include "VideoCommon/LookUpTables.h"
|
||||
#include "VideoCommon/sfont.inc"
|
||||
|
@ -16,7 +18,7 @@ static bool TexFmt_Overlay_Center = false;
|
|||
|
||||
// TRAM
|
||||
// STATE_TO_SAVE
|
||||
GC_ALIGNED16(u8 texMem[TMEM_SIZE]);
|
||||
alignas(16) u8 texMem[TMEM_SIZE];
|
||||
|
||||
int TexDecoder_GetTexelSizeInNibbles(int format)
|
||||
{
|
||||
|
|
|
@ -1065,8 +1065,8 @@ void _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, in
|
|||
const __m128i dxt = _mm_loadu_si128((__m128i *)(src + sizeof(struct DXTBlock) * 2 * xStep));
|
||||
|
||||
// Copy the 2-bit indices from each DXT block:
|
||||
GC_ALIGNED16( u32 dxttmp[4] );
|
||||
_mm_store_si128((__m128i *)dxttmp, dxt);
|
||||
alignas(16) u32 dxttmp[4];
|
||||
_mm_store_si128((__m128i*)dxttmp, dxt);
|
||||
|
||||
u32 dxt0sel = dxttmp[1];
|
||||
u32 dxt1sel = dxttmp[3];
|
||||
|
@ -1204,10 +1204,10 @@ void _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, in
|
|||
u32 *dst32 = ( dst + (y + z*4) * width + x );
|
||||
|
||||
// Copy the colors here:
|
||||
GC_ALIGNED16( u32 colors0[4] );
|
||||
GC_ALIGNED16( u32 colors1[4] );
|
||||
_mm_store_si128((__m128i *)colors0, mmcolors0);
|
||||
_mm_store_si128((__m128i *)colors1, mmcolors1);
|
||||
alignas(16) u32 colors0[4];
|
||||
alignas(16) u32 colors1[4];
|
||||
_mm_store_si128((__m128i*)colors0, mmcolors0);
|
||||
_mm_store_si128((__m128i*)colors1, mmcolors1);
|
||||
|
||||
// Row 0:
|
||||
dst32[(width * 0) + 0] = colors0[(dxt0sel >> ((0*8)+6)) & 3];
|
||||
|
|
|
@ -21,7 +21,7 @@ ARM64Reg stride_reg = X11;
|
|||
ARM64Reg arraybase_reg = X10;
|
||||
ARM64Reg scale_reg = X9;
|
||||
|
||||
static const float GC_ALIGNED16(scale_factors[]) =
|
||||
alignas(16) static const float scale_factors[] =
|
||||
{
|
||||
1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3),
|
||||
1.0 / (1ULL << 4), 1.0 / (1ULL << 5), 1.0 / (1ULL << 6), 1.0 / (1ULL << 7),
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#include "VideoCommon/VideoConfig.h"
|
||||
#include "VideoCommon/XFMemory.h"
|
||||
|
||||
static float GC_ALIGNED16(g_fProjectionMatrix[16]);
|
||||
alignas(16) static float g_fProjectionMatrix[16];
|
||||
|
||||
// track changes
|
||||
static bool bTexMatricesChanged[2], bPosNormalMatrixChanged, bProjectionChanged, bViewportChanged;
|
||||
|
|
|
@ -137,7 +137,7 @@ TEST(BitField, Alignment)
|
|||
};
|
||||
#pragma pack()
|
||||
|
||||
GC_ALIGNED16(OddlyAlignedTestStruct test_struct);
|
||||
alignas(16) OddlyAlignedTestStruct test_struct;
|
||||
TestUnion& object = test_struct.obj;
|
||||
static_assert(alignof(decltype(test_struct.obj.signed_1bit)) == 1,
|
||||
"Incorrect variable alignment");
|
||||
|
|
Loading…
Reference in New Issue