mirror of https://github.com/xemu-project/xemu.git
migration:
* add avx2 instruction optimization, speeds up zero-page checking on compatible architectures and compilers (gcc 4.9+) * add additional postcopy stats to 'info migrate' output -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJW3resAAoJEB6aO1+FQIO2DDAQAJOooSKFVH0BmnUanpDu7sw9 8xMPRSXCVN6IiK1cmL8Gm3vD20vyg0GwE6V5P8VBVZWedKpnSMgVNLDs/D8PH0zq oGqc+kLKJJkUePRZ5AxTZe2BPzjsd+TB17TdqML21854X5ysJRNA3RTGjntvSQtD ANFVwrZWmh8Br7+4rtJAoyF5GCta8ti8ctOnAUoKpxpn0NsjC8CgkP03AIXJcart fZ5gDAzZkXtgxhu/Dws1oIaVbXbMrdYWata3ruMX9OCmPe+DaU75l8Q/t7q2iVPj 5VFvFSDu7czxIGWpWPqZC5SYEPyupOcixWlR770U9OpYKlrQMLO1CEAHm4VIBIK8 cKIPnuQgAATj6eQ1oE6k0vvSHgzXhFOw7nvSkI3bS6JPq2KY1lx8mAYAUSZtM8/w DJ6zFw/VDKY0eFc42u1/ja6Ojv3oe5YbbRdc5L9reHWC4Fnt5/yLVmr6PNj8nROM AjyyjZ5+RuFljIzvsrFYdx+0673mmSLMzGPDQsfFcddIcxX87aoVG6FS+JC4wCQ+ iS52J/vyKGeVTucIklfigRFiSBFEHbyzX5uO62zA/zzl1CTkfZ3AaRjekz0Vr+Wu cNIkA506+MC4E30LGsrm2zBLO5Nt1uco6pNSdLQsw4UZjkE+ZJ9VhSTgHR5FIMll 1PBs0B+tSUJMaqXfpWDs =7PTp -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/amit-migration/tags/migration-for-2.6-6' into staging migration: * add avx2 instruction optimization, speeds up zero-page checking on compatible architectures and compilers (gcc 4.9+) * add additional postcopy stats to 'info migrate' output # gpg: Signature made Tue 08 Mar 2016 11:29:48 GMT using RSA key ID 854083B6 # gpg: Good signature from "Amit Shah <amit@amitshah.net>" # gpg: aka "Amit Shah <amit@kernel.org>" # gpg: aka "Amit Shah <amitshah@gmx.net>" * remotes/amit-migration/tags/migration-for-2.6-6: cutils: add avx2 instruction optimization configure: detect ifunc and avx2 attribute Postcopy: Fix sync count in info migrate Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
8519c8e073
|
@ -280,6 +280,7 @@ libusb=""
|
|||
usb_redir=""
|
||||
opengl=""
|
||||
opengl_dmabuf="no"
|
||||
avx2_opt="no"
|
||||
zlib="yes"
|
||||
lzo=""
|
||||
snappy=""
|
||||
|
@ -1773,6 +1774,21 @@ EOF
|
|||
fi
|
||||
|
||||
##########################################
|
||||
# avx2 optimization requirement check
|
||||
|
||||
cat > $TMPC << EOF
|
||||
static void bar(void) {}
|
||||
static void *bar_ifunc(void) {return (void*) bar;}
|
||||
static void foo(void) __attribute__((ifunc("bar_ifunc")));
|
||||
int main(void) { foo(); return 0; }
|
||||
EOF
|
||||
if compile_prog "-mavx2" "" ; then
|
||||
if readelf --syms $TMPE |grep "IFUNC.*foo" >/dev/null 2>&1; then
|
||||
avx2_opt="yes"
|
||||
fi
|
||||
fi
|
||||
|
||||
#########################################
|
||||
# zlib check
|
||||
|
||||
if test "$zlib" != "no" ; then
|
||||
|
@ -4790,6 +4806,7 @@ echo "bzip2 support $bzip2"
|
|||
echo "NUMA host support $numa"
|
||||
echo "tcmalloc support $tcmalloc"
|
||||
echo "jemalloc support $jemalloc"
|
||||
echo "avx2 optimization $avx2_opt"
|
||||
|
||||
if test "$sdl_too_old" = "yes"; then
|
||||
echo "-> Your SDL version is too old - please upgrade to have SDL support"
|
||||
|
@ -5178,6 +5195,10 @@ if test "$opengl" = "yes" ; then
|
|||
fi
|
||||
fi
|
||||
|
||||
if test "$avx2_opt" = "yes" ; then
|
||||
echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
|
||||
fi
|
||||
|
||||
if test "$lzo" = "yes" ; then
|
||||
echo "CONFIG_LZO=y" >> $config_host_mak
|
||||
fi
|
||||
|
|
|
@ -476,13 +476,7 @@ void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
|
|||
#endif
|
||||
|
||||
#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
|
||||
static inline bool
|
||||
can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
|
||||
{
|
||||
return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
|
||||
* sizeof(VECTYPE)) == 0
|
||||
&& ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
|
||||
}
|
||||
bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len);
|
||||
size_t buffer_find_nonzero_offset(const void *buf, size_t len);
|
||||
|
||||
/*
|
||||
|
|
|
@ -635,6 +635,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
|
|||
info->ram->normal_bytes = norm_mig_bytes_transferred();
|
||||
info->ram->dirty_pages_rate = s->dirty_pages_rate;
|
||||
info->ram->mbps = s->mbps;
|
||||
info->ram->dirty_sync_count = s->dirty_sync_count;
|
||||
|
||||
if (blk_mig_active()) {
|
||||
info->has_disk = true;
|
||||
|
|
124
util/cutils.c
124
util/cutils.c
|
@ -160,6 +160,14 @@ int qemu_fdatasync(int fd)
|
|||
#endif
|
||||
}
|
||||
|
||||
static bool
|
||||
can_use_buffer_find_nonzero_offset_inner(const void *buf, size_t len)
|
||||
{
|
||||
return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
|
||||
* sizeof(VECTYPE)) == 0
|
||||
&& ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Searches for an area with non-zero content in a buffer
|
||||
*
|
||||
|
@ -168,8 +176,8 @@ int qemu_fdatasync(int fd)
|
|||
* and addr must be a multiple of sizeof(VECTYPE) due to
|
||||
* restriction of optimizations in this function.
|
||||
*
|
||||
* can_use_buffer_find_nonzero_offset() can be used to check
|
||||
* these requirements.
|
||||
* can_use_buffer_find_nonzero_offset_inner() can be used to
|
||||
* check these requirements.
|
||||
*
|
||||
* The return value is the offset of the non-zero area rounded
|
||||
* down to a multiple of sizeof(VECTYPE) for the first
|
||||
|
@ -180,13 +188,13 @@ int qemu_fdatasync(int fd)
|
|||
* If the buffer is all zero the return value is equal to len.
|
||||
*/
|
||||
|
||||
size_t buffer_find_nonzero_offset(const void *buf, size_t len)
|
||||
static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len)
|
||||
{
|
||||
const VECTYPE *p = buf;
|
||||
const VECTYPE zero = (VECTYPE){0};
|
||||
size_t i;
|
||||
|
||||
assert(can_use_buffer_find_nonzero_offset(buf, len));
|
||||
assert(can_use_buffer_find_nonzero_offset_inner(buf, len));
|
||||
|
||||
if (!len) {
|
||||
return 0;
|
||||
|
@ -215,6 +223,114 @@ size_t buffer_find_nonzero_offset(const void *buf, size_t len)
|
|||
return i * sizeof(VECTYPE);
|
||||
}
|
||||
|
||||
/*
|
||||
* GCC before version 4.9 has a bug which will cause the target
|
||||
* attribute work incorrectly and failed to compile in some case,
|
||||
* restrict the gcc version to 4.9+ to prevent the failure.
|
||||
*/
|
||||
|
||||
#if defined CONFIG_AVX2_OPT && QEMU_GNUC_PREREQ(4, 9)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx2")
|
||||
#include <cpuid.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
#define AVX2_VECTYPE __m256i
|
||||
#define AVX2_SPLAT(p) _mm256_set1_epi8(*(p))
|
||||
#define AVX2_ALL_EQ(v1, v2) \
|
||||
(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF)
|
||||
#define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2))
|
||||
|
||||
static bool
|
||||
can_use_buffer_find_nonzero_offset_avx2(const void *buf, size_t len)
|
||||
{
|
||||
return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
|
||||
* sizeof(AVX2_VECTYPE)) == 0
|
||||
&& ((uintptr_t) buf) % sizeof(AVX2_VECTYPE) == 0);
|
||||
}
|
||||
|
||||
static size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len)
|
||||
{
|
||||
const AVX2_VECTYPE *p = buf;
|
||||
const AVX2_VECTYPE zero = (AVX2_VECTYPE){0};
|
||||
size_t i;
|
||||
|
||||
assert(can_use_buffer_find_nonzero_offset_avx2(buf, len));
|
||||
|
||||
if (!len) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
|
||||
if (!AVX2_ALL_EQ(p[i], zero)) {
|
||||
return i * sizeof(AVX2_VECTYPE);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
|
||||
i < len / sizeof(AVX2_VECTYPE);
|
||||
i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
|
||||
AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]);
|
||||
AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]);
|
||||
AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]);
|
||||
AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]);
|
||||
AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1);
|
||||
AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3);
|
||||
if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return i * sizeof(AVX2_VECTYPE);
|
||||
}
|
||||
|
||||
static bool avx2_support(void)
|
||||
{
|
||||
int a, b, c, d;
|
||||
|
||||
if (__get_cpuid_max(0, NULL) < 7) {
|
||||
return false;
|
||||
}
|
||||
|
||||
__cpuid_count(7, 0, a, b, c, d);
|
||||
|
||||
return b & bit_AVX2;
|
||||
}
|
||||
|
||||
bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \
|
||||
__attribute__ ((ifunc("can_use_buffer_find_nonzero_offset_ifunc")));
|
||||
size_t buffer_find_nonzero_offset(const void *buf, size_t len) \
|
||||
__attribute__ ((ifunc("buffer_find_nonzero_offset_ifunc")));
|
||||
|
||||
static void *buffer_find_nonzero_offset_ifunc(void)
|
||||
{
|
||||
typeof(buffer_find_nonzero_offset) *func = (avx2_support()) ?
|
||||
buffer_find_nonzero_offset_avx2 : buffer_find_nonzero_offset_inner;
|
||||
|
||||
return func;
|
||||
}
|
||||
|
||||
static void *can_use_buffer_find_nonzero_offset_ifunc(void)
|
||||
{
|
||||
typeof(can_use_buffer_find_nonzero_offset) *func = (avx2_support()) ?
|
||||
can_use_buffer_find_nonzero_offset_avx2 :
|
||||
can_use_buffer_find_nonzero_offset_inner;
|
||||
|
||||
return func;
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#else
|
||||
bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
|
||||
{
|
||||
return can_use_buffer_find_nonzero_offset_inner(buf, len);
|
||||
}
|
||||
|
||||
size_t buffer_find_nonzero_offset(const void *buf, size_t len)
|
||||
{
|
||||
return buffer_find_nonzero_offset_inner(buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Checks if a buffer is all zeroes
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue