Replace rotate utils with std::rotl

More include cleanup.
This commit is contained in:
Nekotekina 2020-04-13 22:25:33 +03:00
parent f72af2973d
commit 4d8bfe328b
8 changed files with 49 additions and 179 deletions

View File

@ -7,94 +7,6 @@ namespace utils
// Rotate helpers
#if defined(__GNUG__)
inline u8 rol8(u8 x, u8 n)
{
#if __has_builtin(__builtin_rotateleft8)
return __builtin_rotateleft8(x, n);
#else
u8 result = x;
__asm__("rolb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}
inline u8 ror8(u8 x, u8 n)
{
#if __has_builtin(__builtin_rotateright8)
return __builtin_rotateright8(x, n);
#else
u8 result = x;
__asm__("rorb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}
inline u16 rol16(u16 x, u16 n)
{
#if __has_builtin(__builtin_rotateleft16)
return __builtin_rotateleft16(x, n);
#else
u16 result = x;
__asm__("rolw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}
inline u16 ror16(u16 x, u16 n)
{
#if __has_builtin(__builtin_rotateright16)
return __builtin_rotateright16(x, n);
#else
u16 result = x;
__asm__("rorw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}
inline u32 rol32(u32 x, u32 n)
{
#if __has_builtin(__builtin_rotateleft32)
return __builtin_rotateleft32(x, n);
#else
u32 result = x;
__asm__("roll %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}
inline u32 ror32(u32 x, u32 n)
{
#if __has_builtin(__builtin_rotateright32)
return __builtin_rotateright32(x, n);
#else
u32 result = x;
__asm__("rorl %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}
inline u64 rol64(u64 x, u64 n)
{
#if __has_builtin(__builtin_rotateleft64)
return __builtin_rotateleft64(x, n);
#else
u64 result = x;
__asm__("rolq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}
inline u64 ror64(u64 x, u64 n)
{
#if __has_builtin(__builtin_rotateright64)
return __builtin_rotateright64(x, n);
#else
u64 result = x;
__asm__("rorq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
#endif
}
constexpr u64 umulh64(u64 a, u64 b)
{
const __uint128_t x = a;
@ -136,45 +48,6 @@ namespace utils
}
#elif defined(_MSC_VER)
inline u8 rol8(u8 x, u8 n)
{
return _rotl8(x, n);
}
inline u8 ror8(u8 x, u8 n)
{
return _rotr8(x, n);
}
inline u16 rol16(u16 x, u16 n)
{
return _rotl16(x, (u8)n);
}
inline u16 ror16(u16 x, u16 n)
{
return _rotr16(x, (u8)n);
}
inline u32 rol32(u32 x, u32 n)
{
return _rotl(x, (int)n);
}
inline u32 ror32(u32 x, u32 n)
{
return _rotr(x, (int)n);
}
inline u64 rol64(u64 x, u64 n)
{
return _rotl64(x, (int)n);
}
inline u64 ror64(u64 x, u64 n)
{
return _rotr64(x, (int)n);
}
inline u64 umulh64(u64 x, u64 y)
{

View File

@ -2245,14 +2245,14 @@ void ppu_acontext::RLWIMI(ppu_opcode_t op)
if (op.mb32 <= op.me32)
{
// 32-bit op, including mnemonics: INSLWI, INSRWI (TODO)
min = utils::rol32(static_cast<u32>(min), op.sh32) & mask;
max = utils::rol32(static_cast<u32>(max), op.sh32) & mask;
min = std::rotl(static_cast<u32>(min), op.sh32) & mask;
max = std::rotl(static_cast<u32>(max), op.sh32) & mask;
}
else
{
// Full 64-bit op with duplication
min = utils::rol64(static_cast<u32>(min) | min << 32, op.sh32) & mask;
max = utils::rol64(static_cast<u32>(max) | max << 32, op.sh32) & mask;
min = std::rotl<u64>(static_cast<u32>(min) | min << 32, op.sh32) & mask;
max = std::rotl<u64>(static_cast<u32>(max) | max << 32, op.sh32) & mask;
}
if (mask != umax)
@ -2301,14 +2301,14 @@ void ppu_acontext::RLWINM(ppu_opcode_t op)
// EXTRWI and other possible mnemonics
}
min = utils::rol32(static_cast<u32>(min), op.sh32) & mask;
max = utils::rol32(static_cast<u32>(max), op.sh32) & mask;
min = std::rotl(static_cast<u32>(min), op.sh32) & mask;
max = std::rotl(static_cast<u32>(max), op.sh32) & mask;
}
else
{
// Full 64-bit op with duplication
min = utils::rol64(static_cast<u32>(min) | min << 32, op.sh32) & mask;
max = utils::rol64(static_cast<u32>(max) | max << 32, op.sh32) & mask;
min = std::rotl<u64>(static_cast<u32>(min) | min << 32, op.sh32) & mask;
max = std::rotl<u64>(static_cast<u32>(max) | max << 32, op.sh32) & mask;
}
gpr[op.ra] = spec_gpr::approx(min, max);
@ -2396,8 +2396,8 @@ void ppu_acontext::RLDICL(ppu_opcode_t op)
return;
}
min = utils::rol64(min, sh) & mask;
max = utils::rol64(max, sh) & mask;
min = std::rotl(min, sh) & mask;
max = std::rotl(max, sh) & mask;
gpr[op.ra] = spec_gpr::approx(min, max);
}
@ -2425,8 +2425,8 @@ void ppu_acontext::RLDICR(ppu_opcode_t op)
return;
}
min = utils::rol64(min, sh) & mask;
max = utils::rol64(max, sh) & mask;
min = std::rotl(min, sh) & mask;
max = std::rotl(max, sh) & mask;
gpr[op.ra] = spec_gpr::approx(min, max);
}
@ -2451,8 +2451,8 @@ void ppu_acontext::RLDIC(ppu_opcode_t op)
return;
}
min = utils::rol64(min, sh) & mask;
max = utils::rol64(max, sh) & mask;
min = std::rotl(min, sh) & mask;
max = std::rotl(max, sh) & mask;
gpr[op.ra] = spec_gpr::approx(min, max);
}
@ -2474,8 +2474,8 @@ void ppu_acontext::RLDIMI(ppu_opcode_t op)
// INSRDI mnemonic
}
min = utils::rol64(min, sh) & mask;
max = utils::rol64(max, sh) & mask;
min = std::rotl(min, sh) & mask;
max = std::rotl(max, sh) & mask;
if (mask != umax)
{

View File

@ -1873,7 +1873,7 @@ bool ppu_interpreter::VRLB(ppu_thread& ppu, ppu_opcode_t op)
for (uint i = 0; i < 16; i++)
{
d._u8[i] = utils::rol8(a._u8[i], b._u8[i]);
d._u8[i] = std::rotl(a._u8[i], b._u8[i]);
}
return true;
}
@ -1886,7 +1886,7 @@ bool ppu_interpreter::VRLH(ppu_thread& ppu, ppu_opcode_t op)
for (uint i = 0; i < 8; i++)
{
d._u16[i] = utils::rol16(a._u16[i], b._u8[i * 2] & 0xf);
d._u16[i] = std::rotl(a._u16[i], b._u8[i * 2] & 0xf);
}
return true;
}
@ -1899,7 +1899,7 @@ bool ppu_interpreter::VRLW(ppu_thread& ppu, ppu_opcode_t op)
for (uint w = 0; w < 4; w++)
{
d._u32[w] = utils::rol32(a._u32[w], b._u8[w * 4] & 0x1f);
d._u32[w] = std::rotl(a._u32[w], b._u8[w * 4] & 0x1f);
}
return true;
}
@ -3063,21 +3063,21 @@ bool ppu_interpreter::BCCTR(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::RLWIMI(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(std::rotl(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
return true;
}
bool ppu_interpreter::RLWINM(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = dup32(std::rotl(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
return true;
}
bool ppu_interpreter::RLWNM(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
ppu.gpr[op.ra] = dup32(std::rotl(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
return true;
}
@ -3122,21 +3122,21 @@ bool ppu_interpreter::ANDIS(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::RLDICL(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
return true;
}
bool ppu_interpreter::RLDICR(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
return true;
}
bool ppu_interpreter::RLDIC(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
return true;
}
@ -3144,21 +3144,21 @@ bool ppu_interpreter::RLDIC(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::RLDIMI(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask);
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (std::rotl(ppu.gpr[op.rs], op.sh64) & mask);
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
return true;
}
bool ppu_interpreter::RLDCL(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
return true;
}
bool ppu_interpreter::RLDCR(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
ppu.gpr[op.ra] = std::rotl(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
if (op.rc) [[unlikely]] ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
return true;
}

View File

@ -1,7 +1,6 @@
#pragma once
#include "Utilities/BitField.h"
#include "Utilities/asm.h"
template<typename T, u32 I, u32 N> using ppu_bf_t = bf_t<T, sizeof(T) * 8 - N - I, N>;
@ -62,14 +61,14 @@ union ppu_opcode_t
cf_t<ppu_bf_t<s32, 6, 24>, ff_t<u32, 0, 2>> bt24;
};
inline u64 ppu_rotate_mask(u32 mb, u32 me)
constexpr u64 ppu_rotate_mask(u32 mb, u32 me)
{
return utils::ror64(~0ull << (~(me - mb) & 63), mb);
return std::rotl<u64>(~0ull << (~(me - mb) & 63), mb);
}
inline u32 ppu_decode(u32 inst)
constexpr u32 ppu_decode(u32 inst)
{
return (inst >> 26 | inst << (32 - 26)) & 0x1ffff; // Rotate + mask
return std::rotr<u32>(inst, 26) & 0x1ffff; // Rotate + mask
}
// PPU decoder object. D provides functions. T is function pointer type returned.

View File

@ -8,7 +8,6 @@
#include "SPUThread.h"
#include "SPUInterpreter.h"
#include "Utilities/sysinfo.h"
#include "Utilities/asm.h"
#include "PPUAnalyser.h"
#include "Crypto/sha1.h"
@ -3276,7 +3275,7 @@ void spu_recompiler::ROTQBYI(spu_opcode_t op)
}
else if (s == 4 || s == 8 || s == 12)
{
c->pshufd(va, va, utils::rol8(0xE4, s / 2));
c->pshufd(va, va, std::rotl<u8>(0xE4, s / 2));
}
else if (utils::has_ssse3())
{

View File

@ -3,7 +3,6 @@
#include "Utilities/JIT.h"
#include "Utilities/sysinfo.h"
#include "Utilities/asm.h"
#include "SPUThread.h"
#include "Emu/Cell/Common.h"
@ -232,7 +231,7 @@ bool spu_interpreter::ROT(spu_thread& spu, spu_opcode_t op)
for (u32 i = 0; i < 4; i++)
{
spu.gpr[op.rt]._u32[i] = utils::rol32(a._u32[i], b._u32[i]);
spu.gpr[op.rt]._u32[i] = std::rotl(a._u32[i], b._u32[i]);
}
return true;
}
@ -283,7 +282,7 @@ bool spu_interpreter::ROTH(spu_thread& spu, spu_opcode_t op)
for (u32 i = 0; i < 8; i++)
{
spu.gpr[op.rt]._u16[i] = utils::rol16(a._u16[i], b._u16[i]);
spu.gpr[op.rt]._u16[i] = std::rotl(a._u16[i], b._u16[i]);
}
return true;
}

View File

@ -1525,7 +1525,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
{
const u32 mask = utils::rol32(1, args.tag);
const u32 mask = std::rotl<u32>(1, args.tag);
if (mfc_barrier & mask || (args.cmd & (MFC_BARRIER_MASK | MFC_FENCE_MASK) && mfc_fence & mask)) [[unlikely]]
{
@ -1541,13 +1541,13 @@ bool spu_thread::do_dma_check(const spu_mfc_cmd& args)
if ((mfc_queue[i].cmd & ~0xc) == MFC_BARRIER_CMD)
{
mfc_barrier |= -1;
mfc_fence |= utils::rol32(1, mfc_queue[i].tag);
mfc_fence |= std::rotl<u32>(1, mfc_queue[i].tag);
continue;
}
if (true)
{
const u32 _mask = utils::rol32(1u, mfc_queue[i].tag);
const u32 _mask = std::rotl<u32>(1u, mfc_queue[i].tag);
// A command with barrier hard blocks that tag until it's been dealt with
if (mfc_queue[i].cmd & MFC_BARRIER_MASK)
@ -1648,14 +1648,14 @@ bool spu_thread::do_list_transfer(spu_mfc_cmd& args)
if (items[index].sb & 0x8000) [[unlikely]]
{
ch_stall_mask |= utils::rol32(1, args.tag);
ch_stall_mask |= std::rotl<u32>(1, args.tag);
if (!ch_stall_stat.get_count())
{
ch_event_stat |= SPU_EVENT_SN;
}
ch_stall_stat.set_value(utils::rol32(1, args.tag) | ch_stall_stat.get_value());
ch_stall_stat.set_value(std::rotl<u32>(1, args.tag) | ch_stall_stat.get_value());
args.tag |= 0x80; // Set stalled status
return false;
@ -1750,7 +1750,7 @@ void spu_thread::do_mfc(bool wait)
static_cast<void>(std::remove_if(mfc_queue + 0, mfc_queue + mfc_size, [&](spu_mfc_cmd& args)
{
// Select tag bit in the tag mask or the stall mask
const u32 mask = utils::rol32(1, args.tag);
const u32 mask = std::rotl<u32>(1, args.tag);
if ((args.cmd & ~0xc) == MFC_BARRIER_CMD)
{
@ -2107,7 +2107,7 @@ bool spu_thread::process_mfc_cmd()
}
case MFC_PUTQLLUC_CMD:
{
const u32 mask = utils::rol32(1, ch_mfc_cmd.tag);
const u32 mask = std::rotl<u32>(1, ch_mfc_cmd.tag);
if ((mfc_barrier | mfc_fence) & mask) [[unlikely]]
{
@ -2155,11 +2155,11 @@ bool spu_thread::process_mfc_cmd()
}
mfc_queue[mfc_size++] = ch_mfc_cmd;
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
if (ch_mfc_cmd.cmd & MFC_BARRIER_MASK)
{
mfc_barrier |= utils::rol32(1, ch_mfc_cmd.tag);
mfc_barrier |= std::rotl<u32>(1, ch_mfc_cmd.tag);
}
return true;
@ -2191,11 +2191,11 @@ bool spu_thread::process_mfc_cmd()
}
mfc_size++;
mfc_fence |= utils::rol32(1, cmd.tag);
mfc_fence |= std::rotl<u32>(1, cmd.tag);
if (cmd.cmd & MFC_BARRIER_MASK)
{
mfc_barrier |= utils::rol32(1, cmd.tag);
mfc_barrier |= std::rotl<u32>(1, cmd.tag);
}
return true;
@ -2215,7 +2215,7 @@ bool spu_thread::process_mfc_cmd()
{
mfc_queue[mfc_size++] = ch_mfc_cmd;
mfc_barrier |= -1;
mfc_fence |= utils::rol32(1, ch_mfc_cmd.tag);
mfc_fence |= std::rotl<u32>(1, ch_mfc_cmd.tag);
}
return true;
@ -2813,7 +2813,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value)
case MFC_WrListStallAck:
{
// Reset stall status for specified tag
const u32 tag_mask = utils::rol32(1, value);
const u32 tag_mask = std::rotl<u32>(1, value);
if (ch_stall_mask & tag_mask)
{

View File

@ -2413,7 +2413,7 @@ namespace rsx
for (u32 ea = address >> 20, end = ea + (size >> 20); ea < end; ea++)
{
const u32 io = utils::ror32(iomap_table.io[ea], 20);
const u32 io = std::rotr<u32>(iomap_table.io[ea], 20);
if (io + 1)
{