Reimplement utils::popcnt64

Implement utils::popcnt128
This commit is contained in:
Nekotekina 2020-12-29 15:28:02 +03:00
parent 57621d1c4e
commit 6b96807112
3 changed files with 37 additions and 17 deletions

View File

@ -15,6 +15,7 @@
#include "sysPrxForUser.h"
#include "cellSpurs.h"
#include "util/asm.hpp"
#include "util/v128.hpp"
#include "util/v128sse.hpp"
@ -3930,14 +3931,8 @@ s32 _spurs::create_task(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id,
if (ls_pattern)
{
v128 ls_pattern_128 = v128::from64r(ls_pattern->_u64[0], ls_pattern->_u64[1]);
u32 ls_blocks = 0;
for (auto i = 0; i < 128; i++)
{
if (ls_pattern_128._bit[i])
{
ls_blocks++;
}
}
const u32 ls_blocks = utils::popcnt128(ls_pattern_128._u);
if (ls_blocks > alloc_ls_blocks)
{

View File

@ -19,6 +19,7 @@ extern "C"
ushort _rotl16(ushort, uchar);
uint _rotl(uint, int);
u64 _rotl64(u64, int);
u64 __popcnt64(u64);
s64 __mulh(s64, s64);
u64 __umulh(u64, u64);
@ -211,6 +212,38 @@ namespace utils
#endif
}
constexpr u32 popcnt64(u64 v)
{
#if !defined(_MSC_VER) || defined(__SSE4_2__)
if (std::is_constant_evaluated())
#endif
{
v = (v & 0xaaaaaaaaaaaaaaaa) / 2 + (v & 0x5555555555555555);
v = (v & 0xcccccccccccccccc) / 4 + (v & 0x3333333333333333);
v = (v & 0xf0f0f0f0f0f0f0f0) / 16 + (v & 0x0f0f0f0f0f0f0f0f);
v = (v & 0xff00ff00ff00ff00) / 256 + (v & 0x00ff00ff00ff00ff);
v = ((v & 0xffff0000ffff0000) >> 16) + (v & 0x0000ffff0000ffff);
return static_cast<u32>((v >> 32) + v);
}
#if !defined(_MSC_VER) || defined(__SSE4_2__)
#ifdef _MSC_VER
return static_cast<u32>(__popcnt64(v));
#else
return __builtin_popcountll(v);
#endif
#endif
}
constexpr u32 popcnt128(const u128& v)
{
#ifdef _MSC_VER
return popcnt64(v.lo) + popcnt64(v.hi);
#else
return popcnt64(v) + popcnt64(v >> 64);
#endif
}
constexpr u64 umulh64(u64 x, u64 y)
{
#ifdef _MSC_VER

View File

@ -162,15 +162,7 @@ static NEVER_INLINE bool ptr_cmp(const void* data, u32 _size, u128 old128, u128
// Count is taken from least significant byte and ignores some flags
const u64 count = static_cast<u64>(old128) & 0xff;
u64 bitc = new_value;
bitc = (bitc & 0xaaaaaaaaaaaaaaaa) / 2 + (bitc & 0x5555555555555555);
bitc = (bitc & 0xcccccccccccccccc) / 4 + (bitc & 0x3333333333333333);
bitc = (bitc & 0xf0f0f0f0f0f0f0f0) / 16 + (bitc & 0x0f0f0f0f0f0f0f0f);
bitc = (bitc & 0xff00ff00ff00ff00) / 256 + (bitc & 0x00ff00ff00ff00ff);
bitc = ((bitc & 0xffff0000ffff0000) >> 16) + (bitc & 0x0000ffff0000ffff);
bitc = (bitc >> 32) + bitc;
result = count < bitc;
result = count < utils::popcnt64(new_value);
break;
}
default: