SPU: Copy with memcpy() instead of hand-rolled SSE2

In some very unscientific benchmark:
spu_thread::do_dma_transfer() was taking 2.27% of my CPU before, now
0.07%, while __memmove_avx_unaligned_erms() was taking 1.47% and now
2.88%, which added makes about 0.8% saved.
This commit is contained in:
Emmanuel Gil Peyrot 2019-11-20 16:53:22 +01:00 committed by Nekotekina
parent 5261886449
commit 425e032a62
1 changed files with 8 additions and 0 deletions

View File

@ -1433,6 +1433,9 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
auto lock = vm::passive_lock(eal & -128, ::align(eal + size, 128));
#ifdef __GNUG__
std::memcpy(dst, src, size);
#else
while (size >= 128)
{
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
@ -1450,6 +1453,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
src += 16;
size -= 16;
}
#endif
lock->release(0);
break;
@ -1483,6 +1487,9 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
}
default:
{
#ifdef __GNUG__
std::memcpy(dst, src, size);
#else
while (size >= 128)
{
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
@ -1500,6 +1507,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
src += 16;
size -= 16;
}
#endif
break;
}