mirror of https://github.com/xemu-project/xemu.git
Migration Pull request (take3)
Hi In this PULL request: - Added to leonardo fixes: Fixes:b5eea99ec2
("migration: Add yank feature") Reported-by: Li Xiaohui <xiaohli@redhat.com> Please apply. [take 2] - rebase to latest upstream - fix compilation of linux-user (if have_system was missing) (me) - cleanup multifd_load_cleanup(leonardo) - Document RAM flags (me) Please apply. [take 1] This are all the reviewed patches for migration: - AVX512 support for xbzrle (Ling Xu) - /dev/userfaultd support (Peter Xu) - Improve ordering of channels (Peter Xu) - multifd cleanups (Li Zhang) - Remove spurious files from last merge (me) Rebase makes that to you - Fix mixup between state_pending_{exact,estimate} (me) - Cache RAM size during migration (me) - cleanup several functions (me) Please apply. -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmPppZYACgkQ9IfvGFhy 1yPLvQ//f8D6txzFawaxrfzpSAHnq70Gx+B5GkIwgwB8nlPIC3QELEf5uooM/RGA nSaUctUNOJUWqVGK3vp3jDIep02DzdIUrlOfy96h+pnTMpyMWFC2BexDfveVMUId dw8WCWZkGCFDfIWuKF+GA8eTu6HM1ouzgCJrRmPtCqsikzAPkogPm60hQSTAQxm9 Kzdp1SXV1HmyA440vy8rtYf71BKpvb9OJFmwgZ+ICy0rc1aUmgJbKxkwyOgiI2lq ONekpbOg7lzlFVAQu1QHTmYN13bsn4uzwUgdifn1PixFQyRE3AVs4jdTmqeLnoPe Ac6j8v3pDOw/Xf4kpRWUmhkqTMEJt8/lyneJzu1mQkw0wwiUtDvknFgPG8wJsa+J ZQr1cBXQj4IjtkN6+ixF7XYvx3T6pWz0L+/w2+TbFBdLWIrPgFH0yPYjhx7FdDid cjUHyS1a0w9ngnXOxRG8+UNHWCpPOUhXeeiyNioogYZNKu77PFxJVDMe3eB6dXAB pDfl4P129PloKAPafcz9E6Sxr+lIgrETZmsRJlRz4czg18TxlIukMlDtyrepNWti GtIf9xTpP3JKjpHnKbWLaxP5VeFC7kQd0qas4VxD+tDjbJdUZdZMfHcOSS0SMRGe q5LVEzMMIPCQJQIqiLEJ0HTUUOtB8i+bGoirNEbDqhLa/oZwPP8= =TDnO -----END PGP SIGNATURE----- Merge tag 'migration-20230213-pull-request' of https://gitlab.com/juan.quintela/qemu into staging Migration Pull request (take3) Hi In this PULL request: - Added to leonardo fixes: Fixes:b5eea99ec2
("migration: Add yank feature") Reported-by: Li Xiaohui <xiaohli@redhat.com> Please apply. [take 2] - rebase to latest upstream - fix compilation of linux-user (if have_system was missing) (me) - cleanup multifd_load_cleanup(leonardo) - Document RAM flags (me) Please apply. [take 1] This are all the reviewed patches for migration: - AVX512 support for xbzrle (Ling Xu) - /dev/userfaultd support (Peter Xu) - Improve ordering of channels (Peter Xu) - multifd cleanups (Li Zhang) - Remove spurious files from last merge (me) Rebase makes that to you - Fix mixup between state_pending_{exact,estimate} (me) - Cache RAM size during migration (me) - cleanup several functions (me) Please apply. # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmPppZYACgkQ9IfvGFhy # 1yPLvQ//f8D6txzFawaxrfzpSAHnq70Gx+B5GkIwgwB8nlPIC3QELEf5uooM/RGA # nSaUctUNOJUWqVGK3vp3jDIep02DzdIUrlOfy96h+pnTMpyMWFC2BexDfveVMUId # dw8WCWZkGCFDfIWuKF+GA8eTu6HM1ouzgCJrRmPtCqsikzAPkogPm60hQSTAQxm9 # Kzdp1SXV1HmyA440vy8rtYf71BKpvb9OJFmwgZ+ICy0rc1aUmgJbKxkwyOgiI2lq # ONekpbOg7lzlFVAQu1QHTmYN13bsn4uzwUgdifn1PixFQyRE3AVs4jdTmqeLnoPe # Ac6j8v3pDOw/Xf4kpRWUmhkqTMEJt8/lyneJzu1mQkw0wwiUtDvknFgPG8wJsa+J # ZQr1cBXQj4IjtkN6+ixF7XYvx3T6pWz0L+/w2+TbFBdLWIrPgFH0yPYjhx7FdDid # cjUHyS1a0w9ngnXOxRG8+UNHWCpPOUhXeeiyNioogYZNKu77PFxJVDMe3eB6dXAB # pDfl4P129PloKAPafcz9E6Sxr+lIgrETZmsRJlRz4czg18TxlIukMlDtyrepNWti # GtIf9xTpP3JKjpHnKbWLaxP5VeFC7kQd0qas4VxD+tDjbJdUZdZMfHcOSS0SMRGe # q5LVEzMMIPCQJQIqiLEJ0HTUUOtB8i+bGoirNEbDqhLa/oZwPP8= # =TDnO # -----END PGP SIGNATURE----- # gpg: Signature made Mon 13 Feb 2023 02:51:02 GMT # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * tag 'migration-20230213-pull-request' of https://gitlab.com/juan.quintela/qemu: (22 commits) ram: Document migration ram flags migration/multifd: Move load_cleanup inside incoming_state_destroy migration/multifd: Join all multifd threads in order to avoid leaks migration/multifd: Remove unnecessary assignment on multifd_load_cleanup() migration/multifd: Change multifd_load_cleanup() signature and usage migration: Postpone postcopy preempt channel to be after main migration: Add a semaphore to count PONGs migration: Cleanup postcopy_preempt_setup() migration: Rework multi-channel checks on URI Update bench-code for addressing CI problem AVX512 support for xbzrle_encode_buffer migration: I messed state_pending_exact/estimate migration: Make ram_save_target_page() a pointer migration: Calculate ram size once migration: Split ram_bytes_total_common() in two functions migration: Make find_dirty_block() return a single parameter migration: Simplify ram_find_and_save_block() util/userfaultfd: Support /dev/userfaultfd linux-headers: Update to v6.1 multifd: Remove some redundant code ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
f670b3eec7
|
@ -1,7 +0,0 @@
|
|||
# Boards:
|
||||
#
|
||||
CONFIG_ISAPC=n
|
||||
CONFIG_I440FX=n
|
||||
CONFIG_Q35=n
|
||||
CONFIG_MICROVM=y
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
# Boards:
|
||||
#
|
||||
CONFIG_ISAPC=y
|
||||
CONFIG_I440FX=y
|
||||
CONFIG_Q35=y
|
||||
CONFIG_MICROVM=y
|
|
@ -98,18 +98,42 @@ extern "C" {
|
|||
#define DRM_FORMAT_INVALID 0
|
||||
|
||||
/* color index */
|
||||
#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */
|
||||
#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */
|
||||
#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */
|
||||
#define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */
|
||||
|
||||
/* 8 bpp Red */
|
||||
/* 1 bpp Darkness (inverse relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */
|
||||
|
||||
/* 2 bpp Darkness (inverse relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */
|
||||
|
||||
/* 4 bpp Darkness (inverse relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */
|
||||
|
||||
/* 8 bpp Darkness (inverse relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */
|
||||
|
||||
/* 1 bpp Red (direct relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */
|
||||
|
||||
/* 2 bpp Red (direct relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */
|
||||
|
||||
/* 4 bpp Red (direct relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */
|
||||
|
||||
/* 8 bpp Red (direct relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
|
||||
|
||||
/* 10 bpp Red */
|
||||
/* 10 bpp Red (direct relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */
|
||||
|
||||
/* 12 bpp Red */
|
||||
/* 12 bpp Red (direct relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */
|
||||
|
||||
/* 16 bpp Red */
|
||||
/* 16 bpp Red (direct relationship between channel value and brightness) */
|
||||
#define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */
|
||||
|
||||
/* 16 bpp RG */
|
||||
|
@ -204,7 +228,9 @@ extern "C" {
|
|||
#define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
|
||||
|
||||
#define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
|
||||
#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */
|
||||
#define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
|
||||
#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */
|
||||
#define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */
|
||||
#define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */
|
||||
|
||||
|
|
|
@ -736,6 +736,51 @@ enum ethtool_module_power_mode {
|
|||
ETHTOOL_MODULE_POWER_MODE_HIGH,
|
||||
};
|
||||
|
||||
/**
|
||||
* enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE
|
||||
* functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
|
||||
* @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are
|
||||
* unknown
|
||||
* @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled
|
||||
* @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled
|
||||
*/
|
||||
enum ethtool_podl_pse_admin_state {
|
||||
ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1,
|
||||
ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
|
||||
ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED,
|
||||
};
|
||||
|
||||
/**
|
||||
* enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE.
|
||||
* IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus:
|
||||
* @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE
|
||||
* @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is
|
||||
* asserted true when the PoDL PSE state diagram variable mr_pse_enable is
|
||||
* false"
|
||||
* @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is
|
||||
* asserted true when either of the PSE state diagram variables
|
||||
* pi_detecting or pi_classifying is true."
|
||||
* @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower”
|
||||
* is asserted true when the PoDL PSE state diagram variable pi_powered is
|
||||
* true."
|
||||
* @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted
|
||||
* true when the PoDL PSE state diagram variable pi_sleeping is true."
|
||||
* @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true
|
||||
* when the logical combination of the PoDL PSE state diagram variables
|
||||
* pi_prebiased*!pi_sleeping is true."
|
||||
* @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted
|
||||
* true when the PoDL PSE state diagram variable overload_held is true."
|
||||
*/
|
||||
enum ethtool_podl_pse_pw_d_status {
|
||||
ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1,
|
||||
ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED,
|
||||
ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING,
|
||||
ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING,
|
||||
ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP,
|
||||
ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE,
|
||||
ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ethtool_gstrings - string set for data tagging
|
||||
* @cmd: Command number = %ETHTOOL_GSTRINGS
|
||||
|
@ -1840,6 +1885,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
|
|||
#define MASTER_SLAVE_STATE_SLAVE 3
|
||||
#define MASTER_SLAVE_STATE_ERR 4
|
||||
|
||||
/* These are used to throttle the rate of data on the phy interface when the
|
||||
* native speed of the interface is higher than the link speed. These should
|
||||
* not be used for phy interfaces which natively support multiple speeds (e.g.
|
||||
* MII or SGMII).
|
||||
*/
|
||||
/* No rate matching performed. */
|
||||
#define RATE_MATCH_NONE 0
|
||||
/* The phy sends pause frames to throttle the MAC. */
|
||||
#define RATE_MATCH_PAUSE 1
|
||||
/* The phy asserts CRS to prevent the MAC from transmitting. */
|
||||
#define RATE_MATCH_CRS 2
|
||||
/* The MAC is programmed with a sufficiently-large IPG. */
|
||||
#define RATE_MATCH_OPEN_LOOP 3
|
||||
|
||||
/* Which connector port. */
|
||||
#define PORT_TP 0x00
|
||||
#define PORT_AUI 0x01
|
||||
|
@ -2033,8 +2092,8 @@ enum ethtool_reset_flags {
|
|||
* reported consistently by PHYLIB. Read-only.
|
||||
* @master_slave_cfg: Master/slave port mode.
|
||||
* @master_slave_state: Master/slave port state.
|
||||
* @rate_matching: Rate adaptation performed by the PHY
|
||||
* @reserved: Reserved for future use; see the note on reserved space.
|
||||
* @reserved1: Reserved for future use; see the note on reserved space.
|
||||
* @link_mode_masks: Variable length bitmaps.
|
||||
*
|
||||
* If autonegotiation is disabled, the speed and @duplex represent the
|
||||
|
@ -2085,7 +2144,7 @@ struct ethtool_link_settings {
|
|||
uint8_t transceiver;
|
||||
uint8_t master_slave_cfg;
|
||||
uint8_t master_slave_state;
|
||||
uint8_t reserved1[1];
|
||||
uint8_t rate_matching;
|
||||
uint32_t reserved[7];
|
||||
uint32_t link_mode_masks[];
|
||||
/* layout of link_mode_masks fields:
|
||||
|
|
|
@ -194,6 +194,9 @@
|
|||
* - add FUSE_SECURITY_CTX init flag
|
||||
* - add security context to create, mkdir, symlink, and mknod requests
|
||||
* - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX
|
||||
*
|
||||
* 7.37
|
||||
* - add FUSE_TMPFILE
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_FUSE_H
|
||||
|
@ -225,7 +228,7 @@
|
|||
#define FUSE_KERNEL_VERSION 7
|
||||
|
||||
/** Minor version number of this interface */
|
||||
#define FUSE_KERNEL_MINOR_VERSION 36
|
||||
#define FUSE_KERNEL_MINOR_VERSION 37
|
||||
|
||||
/** The node ID of the root inode */
|
||||
#define FUSE_ROOT_ID 1
|
||||
|
@ -533,6 +536,7 @@ enum fuse_opcode {
|
|||
FUSE_SETUPMAPPING = 48,
|
||||
FUSE_REMOVEMAPPING = 49,
|
||||
FUSE_SYNCFS = 50,
|
||||
FUSE_TMPFILE = 51,
|
||||
|
||||
/* CUSE specific operations */
|
||||
CUSE_INIT = 4096,
|
||||
|
|
|
@ -862,6 +862,7 @@
|
|||
#define ABS_TOOL_WIDTH 0x1c
|
||||
|
||||
#define ABS_VOLUME 0x20
|
||||
#define ABS_PROFILE 0x21
|
||||
|
||||
#define ABS_MISC 0x28
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
|
||||
#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */
|
||||
#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */
|
||||
#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */
|
||||
|
||||
/* Legacy feature bits */
|
||||
#ifndef VIRTIO_BLK_NO_LEGACY
|
||||
|
@ -119,6 +120,21 @@ struct virtio_blk_config {
|
|||
uint8_t write_zeroes_may_unmap;
|
||||
|
||||
uint8_t unused1[3];
|
||||
|
||||
/* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */
|
||||
/*
|
||||
* The maximum secure erase sectors (in 512-byte sectors) for
|
||||
* one segment.
|
||||
*/
|
||||
__virtio32 max_secure_erase_sectors;
|
||||
/*
|
||||
* The maximum number of secure erase segments in a
|
||||
* secure erase command.
|
||||
*/
|
||||
__virtio32 max_secure_erase_seg;
|
||||
/* Secure erase commands must be aligned to this number of sectors. */
|
||||
__virtio32 secure_erase_sector_alignment;
|
||||
|
||||
} QEMU_PACKED;
|
||||
|
||||
/*
|
||||
|
@ -153,6 +169,9 @@ struct virtio_blk_config {
|
|||
/* Write zeroes command */
|
||||
#define VIRTIO_BLK_T_WRITE_ZEROES 13
|
||||
|
||||
/* Secure erase command */
|
||||
#define VIRTIO_BLK_T_SECURE_ERASE 14
|
||||
|
||||
#ifndef VIRTIO_BLK_NO_LEGACY
|
||||
/* Barrier before this op. */
|
||||
#define VIRTIO_BLK_T_BARRIER 0x80000000
|
||||
|
|
|
@ -20,18 +20,18 @@
|
|||
#define HUGETLB_FLAG_ENCODE_SHIFT 26
|
||||
#define HUGETLB_FLAG_ENCODE_MASK 0x3f
|
||||
|
||||
#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
|
||||
#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
|
||||
|
|
|
@ -77,6 +77,8 @@
|
|||
|
||||
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
|
||||
|
||||
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
|
|
@ -103,6 +103,8 @@
|
|||
|
||||
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
|
||||
|
||||
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
|
|
@ -48,6 +48,7 @@ struct kvm_sregs {
|
|||
/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
|
||||
struct kvm_riscv_config {
|
||||
unsigned long isa;
|
||||
unsigned long zicbom_block_size;
|
||||
};
|
||||
|
||||
/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
|
||||
|
@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID {
|
|||
KVM_RISCV_ISA_EXT_M,
|
||||
KVM_RISCV_ISA_EXT_SVPBMT,
|
||||
KVM_RISCV_ISA_EXT_SSTC,
|
||||
KVM_RISCV_ISA_EXT_SVINVAL,
|
||||
KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
|
||||
KVM_RISCV_ISA_EXT_ZICBOM,
|
||||
KVM_RISCV_ISA_EXT_MAX,
|
||||
};
|
||||
|
||||
|
|
|
@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt {
|
|||
#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220
|
||||
#define KVM_CAP_S390_ZPCI_OP 221
|
||||
#define KVM_CAP_S390_CPU_TOPOLOGY 222
|
||||
#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
|
|
@ -48,12 +48,26 @@
|
|||
#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7)
|
||||
|
||||
#define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10)
|
||||
#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11)
|
||||
#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12)
|
||||
#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13)
|
||||
#define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14)
|
||||
#define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15)
|
||||
#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18)
|
||||
#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16)
|
||||
#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17)
|
||||
|
||||
#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18)
|
||||
#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19)
|
||||
#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19)
|
||||
|
||||
#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12)
|
||||
#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13)
|
||||
#define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14)
|
||||
#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16)
|
||||
#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17)
|
||||
|
||||
#define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18)
|
||||
#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19)
|
||||
|
||||
/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
|
||||
#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff
|
||||
|
|
|
@ -12,6 +12,10 @@
|
|||
|
||||
#include <linux/types.h>
|
||||
|
||||
/* ioctls for /dev/userfaultfd */
|
||||
#define USERFAULTFD_IOC 0xAA
|
||||
#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00)
|
||||
|
||||
/*
|
||||
* If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
|
||||
* UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
|
||||
|
|
|
@ -986,6 +986,148 @@ enum vfio_device_mig_state {
|
|||
VFIO_DEVICE_STATE_RUNNING_P2P = 5,
|
||||
};
|
||||
|
||||
/*
|
||||
* Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power
|
||||
* state with the platform-based power management. Device use of lower power
|
||||
* states depends on factors managed by the runtime power management core,
|
||||
* including system level support and coordinating support among dependent
|
||||
* devices. Enabling device low power entry does not guarantee lower power
|
||||
* usage by the device, nor is a mechanism provided through this feature to
|
||||
* know the current power state of the device. If any device access happens
|
||||
* (either from the host or through the vfio uAPI) when the device is in the
|
||||
* low power state, then the host will move the device out of the low power
|
||||
* state as necessary prior to the access. Once the access is completed, the
|
||||
* device may re-enter the low power state. For single shot low power support
|
||||
* with wake-up notification, see
|
||||
* VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd
|
||||
* device regions is disabled on LOW_POWER_ENTRY and may only be resumed after
|
||||
* calling LOW_POWER_EXIT.
|
||||
*/
|
||||
#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3
|
||||
|
||||
/*
|
||||
* This device feature has the same behavior as
|
||||
* VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user
|
||||
* provides an eventfd for wake-up notification. When the device moves out of
|
||||
* the low power state for the wake-up, the host will not allow the device to
|
||||
* re-enter a low power state without a subsequent user call to one of the low
|
||||
* power entry device feature IOCTLs. Access to mmap'd device regions is
|
||||
* disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the
|
||||
* low power exit. The low power exit can happen either through LOW_POWER_EXIT
|
||||
* or through any other access (where the wake-up notification has been
|
||||
* generated). The access to mmap'd device regions will not trigger low power
|
||||
* exit.
|
||||
*
|
||||
* The notification through the provided eventfd will be generated only when
|
||||
* the device has entered and is resumed from a low power state after
|
||||
* calling this device feature IOCTL. A device that has not entered low power
|
||||
* state, as managed through the runtime power management core, will not
|
||||
* generate a notification through the provided eventfd on access. Calling the
|
||||
* LOW_POWER_EXIT feature is optional in the case where notification has been
|
||||
* signaled on the provided eventfd that a resume from low power has occurred.
|
||||
*/
|
||||
struct vfio_device_low_power_entry_with_wakeup {
|
||||
__s32 wakeup_eventfd;
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4
|
||||
|
||||
/*
|
||||
* Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as
|
||||
* previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or
|
||||
* VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features.
|
||||
* This device feature IOCTL may itself generate a wakeup eventfd notification
|
||||
* in the latter case if the device had previously entered a low power state.
|
||||
*/
|
||||
#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5
|
||||
|
||||
/*
|
||||
* Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging.
|
||||
* VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports
|
||||
* DMA logging.
|
||||
*
|
||||
* DMA logging allows a device to internally record what DMAs the device is
|
||||
* initiating and report them back to userspace. It is part of the VFIO
|
||||
* migration infrastructure that allows implementing dirty page tracking
|
||||
* during the pre copy phase of live migration. Only DMA WRITEs are logged,
|
||||
* and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE.
|
||||
*
|
||||
* When DMA logging is started a range of IOVAs to monitor is provided and the
|
||||
* device can optimize its logging to cover only the IOVA range given. Each
|
||||
* DMA that the device initiates inside the range will be logged by the device
|
||||
* for later retrieval.
|
||||
*
|
||||
* page_size is an input that hints what tracking granularity the device
|
||||
* should try to achieve. If the device cannot do the hinted page size then
|
||||
* it's the driver choice which page size to pick based on its support.
|
||||
* On output the device will return the page size it selected.
|
||||
*
|
||||
* ranges is a pointer to an array of
|
||||
* struct vfio_device_feature_dma_logging_range.
|
||||
*
|
||||
* The core kernel code guarantees to support by minimum num_ranges that fit
|
||||
* into a single kernel page. User space can try higher values but should give
|
||||
* up if the above can't be achieved as of some driver limitations.
|
||||
*
|
||||
* A single call to start device DMA logging can be issued and a matching stop
|
||||
* should follow at the end. Another start is not allowed in the meantime.
|
||||
*/
|
||||
struct vfio_device_feature_dma_logging_control {
|
||||
__aligned_u64 page_size;
|
||||
__u32 num_ranges;
|
||||
__u32 __reserved;
|
||||
__aligned_u64 ranges;
|
||||
};
|
||||
|
||||
struct vfio_device_feature_dma_logging_range {
|
||||
__aligned_u64 iova;
|
||||
__aligned_u64 length;
|
||||
};
|
||||
|
||||
#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6
|
||||
|
||||
/*
|
||||
* Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started
|
||||
* by VFIO_DEVICE_FEATURE_DMA_LOGGING_START
|
||||
*/
|
||||
#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7
|
||||
|
||||
/*
|
||||
* Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log
|
||||
*
|
||||
* Query the device's DMA log for written pages within the given IOVA range.
|
||||
* During querying the log is cleared for the IOVA range.
|
||||
*
|
||||
* bitmap is a pointer to an array of u64s that will hold the output bitmap
|
||||
* with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits
|
||||
* is given by:
|
||||
* bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64))
|
||||
*
|
||||
* The input page_size can be any power of two value and does not have to
|
||||
* match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver
|
||||
* will format its internal logging to match the reporting page size, possibly
|
||||
* by replicating bits if the internal page size is lower than requested.
|
||||
*
|
||||
* The LOGGING_REPORT will only set bits in the bitmap and never clear or
|
||||
* perform any initialization of the user provided bitmap.
|
||||
*
|
||||
* If any error is returned userspace should assume that the dirty log is
|
||||
* corrupted. Error recovery is to consider all memory dirty and try to
|
||||
* restart the dirty tracking, or to abort/restart the whole migration.
|
||||
*
|
||||
* If DMA logging is not enabled, an error will be returned.
|
||||
*
|
||||
*/
|
||||
struct vfio_device_feature_dma_logging_report {
|
||||
__aligned_u64 iova;
|
||||
__aligned_u64 length;
|
||||
__aligned_u64 page_size;
|
||||
__aligned_u64 bitmap;
|
||||
};
|
||||
|
||||
#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8
|
||||
|
||||
/* -------- API for Type1 VFIO IOMMU -------- */
|
||||
|
||||
/**
|
||||
|
|
17
meson.build
17
meson.build
|
@ -2351,6 +2351,22 @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
|
|||
int main(int argc, char *argv[]) { return bar(argv[argc - 1]); }
|
||||
'''), error_message: 'AVX512F not available').allowed())
|
||||
|
||||
config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
|
||||
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
|
||||
.require(cc.links('''
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bw")
|
||||
#include <cpuid.h>
|
||||
#include <immintrin.h>
|
||||
static int bar(void *a) {
|
||||
|
||||
__m512i *x = a;
|
||||
__m512i res= _mm512_abs_epi8(*x);
|
||||
return res[1];
|
||||
}
|
||||
int main(int argc, char *argv[]) { return bar(argv[0]); }
|
||||
'''), error_message: 'AVX512BW not available').allowed())
|
||||
|
||||
have_pvrdma = get_option('pvrdma') \
|
||||
.require(rdma.found(), error_message: 'PVRDMA requires OpenFabrics libraries') \
|
||||
.require(cc.compiles(gnu_source_prefix + '''
|
||||
|
@ -3783,6 +3799,7 @@ summary_info += {'debug stack usage': get_option('debug_stack_usage')}
|
|||
summary_info += {'mutex debugging': get_option('debug_mutex')}
|
||||
summary_info += {'memory allocator': get_option('malloc')}
|
||||
summary_info += {'avx2 optimization': config_host_data.get('CONFIG_AVX2_OPT')}
|
||||
summary_info += {'avx512bw optimization': config_host_data.get('CONFIG_AVX512BW_OPT')}
|
||||
summary_info += {'avx512f optimization': config_host_data.get('CONFIG_AVX512F_OPT')}
|
||||
summary_info += {'gprof enabled': get_option('gprof')}
|
||||
summary_info += {'gcov': get_option('b_coverage')}
|
||||
|
|
|
@ -104,6 +104,8 @@ option('avx2', type: 'feature', value: 'auto',
|
|||
description: 'AVX2 optimizations')
|
||||
option('avx512f', type: 'feature', value: 'disabled',
|
||||
description: 'AVX512F optimizations')
|
||||
option('avx512bw', type: 'feature', value: 'auto',
|
||||
description: 'AVX512BW optimizations')
|
||||
option('keyring', type: 'feature', value: 'auto',
|
||||
description: 'Linux keyring support')
|
||||
|
||||
|
|
|
@ -184,16 +184,27 @@ static int migration_maybe_pause(MigrationState *s,
|
|||
int new_state);
|
||||
static void migrate_fd_cancel(MigrationState *s);
|
||||
|
||||
static bool migrate_allow_multi_channels = true;
|
||||
|
||||
void migrate_protocol_allow_multi_channels(bool allow)
|
||||
static bool migration_needs_multiple_sockets(void)
|
||||
{
|
||||
migrate_allow_multi_channels = allow;
|
||||
return migrate_use_multifd() || migrate_postcopy_preempt();
|
||||
}
|
||||
|
||||
bool migrate_multi_channels_is_allowed(void)
|
||||
static bool uri_supports_multi_channels(const char *uri)
|
||||
{
|
||||
return migrate_allow_multi_channels;
|
||||
return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) ||
|
||||
strstart(uri, "vsock:", NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
migration_channels_and_uri_compatible(const char *uri, Error **errp)
|
||||
{
|
||||
if (migration_needs_multiple_sockets() &&
|
||||
!uri_supports_multi_channels(uri)) {
|
||||
error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
|
||||
|
@ -224,6 +235,8 @@ void migration_object_init(void)
|
|||
qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0);
|
||||
qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0);
|
||||
qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0);
|
||||
qemu_sem_init(¤t_incoming->postcopy_qemufile_dst_done, 0);
|
||||
|
||||
qemu_mutex_init(¤t_incoming->page_request_mutex);
|
||||
current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
|
||||
|
||||
|
@ -302,6 +315,8 @@ void migration_incoming_state_destroy(void)
|
|||
{
|
||||
struct MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
|
||||
multifd_load_cleanup();
|
||||
|
||||
if (mis->to_src_file) {
|
||||
/* Tell source that we are done */
|
||||
migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
|
||||
|
@ -493,12 +508,15 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp)
|
|||
{
|
||||
const char *p = NULL;
|
||||
|
||||
migrate_protocol_allow_multi_channels(false); /* reset it anyway */
|
||||
/* URI is not suitable for migration? */
|
||||
if (!migration_channels_and_uri_compatible(uri, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
qapi_event_send_migration(MIGRATION_STATUS_SETUP);
|
||||
if (strstart(uri, "tcp:", &p) ||
|
||||
strstart(uri, "unix:", NULL) ||
|
||||
strstart(uri, "vsock:", NULL)) {
|
||||
migrate_protocol_allow_multi_channels(true);
|
||||
socket_start_incoming_migration(p ? p : uri, errp);
|
||||
#ifdef CONFIG_RDMA
|
||||
} else if (strstart(uri, "rdma:", &p)) {
|
||||
|
@ -543,13 +561,7 @@ static void process_incoming_migration_bh(void *opaque)
|
|||
*/
|
||||
qemu_announce_self(&mis->announce_timer, migrate_announce_params());
|
||||
|
||||
if (multifd_load_cleanup(&local_err) != 0) {
|
||||
error_report_err(local_err);
|
||||
autostart = false;
|
||||
}
|
||||
/* If global state section was not received or we are in running
|
||||
state, we need to obey autostart. Any other state is set with
|
||||
runstate_set. */
|
||||
multifd_load_shutdown();
|
||||
|
||||
dirty_bitmap_mig_before_vm_start();
|
||||
|
||||
|
@ -649,9 +661,9 @@ fail:
|
|||
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
|
||||
MIGRATION_STATUS_FAILED);
|
||||
qemu_fclose(mis->from_src_file);
|
||||
if (multifd_load_cleanup(&local_err) != 0) {
|
||||
error_report_err(local_err);
|
||||
}
|
||||
|
||||
multifd_load_cleanup();
|
||||
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
@ -723,9 +735,29 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp)
|
|||
migration_incoming_process();
|
||||
}
|
||||
|
||||
static bool migration_needs_multiple_sockets(void)
|
||||
/*
|
||||
* Returns true when we want to start a new incoming migration process,
|
||||
* false otherwise.
|
||||
*/
|
||||
static bool migration_should_start_incoming(bool main_channel)
|
||||
{
|
||||
return migrate_use_multifd() || migrate_postcopy_preempt();
|
||||
/* Multifd doesn't start unless all channels are established */
|
||||
if (migrate_use_multifd()) {
|
||||
return migration_has_all_channels();
|
||||
}
|
||||
|
||||
/* Preempt channel only starts when the main channel is created */
|
||||
if (migrate_postcopy_preempt()) {
|
||||
return main_channel;
|
||||
}
|
||||
|
||||
/*
|
||||
* For all the rest types of migration, we should only reach here when
|
||||
* it's the main channel that's being created, and we should always
|
||||
* proceed with this channel.
|
||||
*/
|
||||
assert(main_channel);
|
||||
return true;
|
||||
}
|
||||
|
||||
void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
|
||||
|
@ -789,7 +821,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
|
|||
}
|
||||
}
|
||||
|
||||
if (migration_has_all_channels()) {
|
||||
if (migration_should_start_incoming(default_channel)) {
|
||||
/* If it's a recovery, we're done */
|
||||
if (postcopy_try_recover()) {
|
||||
return;
|
||||
|
@ -1378,15 +1410,6 @@ static bool migrate_caps_check(bool *cap_list,
|
|||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* incoming side only */
|
||||
if (runstate_check(RUN_STATE_INMIGRATE) &&
|
||||
!migrate_multi_channels_is_allowed() &&
|
||||
cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
|
||||
error_setg(errp, "multifd is not supported by current protocol");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
|
||||
if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
|
||||
error_setg(errp, "Postcopy preempt requires postcopy-ram");
|
||||
|
@ -2471,6 +2494,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
|
|||
MigrationState *s = migrate_get_current();
|
||||
const char *p = NULL;
|
||||
|
||||
/* URI is not suitable for migration? */
|
||||
if (!migration_channels_and_uri_compatible(uri, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
|
||||
has_resume && resume, errp)) {
|
||||
/* Error detected, put into errp */
|
||||
|
@ -2483,11 +2511,9 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
|
|||
}
|
||||
}
|
||||
|
||||
migrate_protocol_allow_multi_channels(false);
|
||||
if (strstart(uri, "tcp:", &p) ||
|
||||
strstart(uri, "unix:", NULL) ||
|
||||
strstart(uri, "vsock:", NULL)) {
|
||||
migrate_protocol_allow_multi_channels(true);
|
||||
socket_start_outgoing_migration(s, p ? p : uri, &local_err);
|
||||
#ifdef CONFIG_RDMA
|
||||
} else if (strstart(uri, "rdma:", &p)) {
|
||||
|
@ -3022,6 +3048,7 @@ retry:
|
|||
case MIG_RP_MSG_PONG:
|
||||
tmp32 = ldl_be_p(buf);
|
||||
trace_source_return_path_thread_pong(tmp32);
|
||||
qemu_sem_post(&ms->rp_state.rp_pong_acks);
|
||||
break;
|
||||
|
||||
case MIG_RP_MSG_REQ_PAGES:
|
||||
|
@ -3155,6 +3182,13 @@ static int await_return_path_close_on_source(MigrationState *ms)
|
|||
return ms->rp_state.error;
|
||||
}
|
||||
|
||||
static inline void
|
||||
migration_wait_main_channel(MigrationState *ms)
|
||||
{
|
||||
/* Wait until one PONG message received */
|
||||
qemu_sem_wait(&ms->rp_state.rp_pong_acks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Switch from normal iteration to postcopy
|
||||
* Returns non-0 on error
|
||||
|
@ -3169,9 +3203,12 @@ static int postcopy_start(MigrationState *ms)
|
|||
bool restart_block = false;
|
||||
int cur_state = MIGRATION_STATUS_ACTIVE;
|
||||
|
||||
if (postcopy_preempt_wait_channel(ms)) {
|
||||
migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
|
||||
return -1;
|
||||
if (migrate_postcopy_preempt()) {
|
||||
migration_wait_main_channel(ms);
|
||||
if (postcopy_preempt_establish_channel(ms)) {
|
||||
migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!migrate_pause_before_switchover()) {
|
||||
|
@ -3582,6 +3619,20 @@ static int postcopy_do_resume(MigrationState *s)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If preempt is enabled, re-establish the preempt channel. Note that
|
||||
* we do it after resume prepare to make sure the main channel will be
|
||||
* created before the preempt channel. E.g. with weak network, the
|
||||
* dest QEMU may get messed up with the preempt and main channels on
|
||||
* the order of connection setup. This guarantees the correct order.
|
||||
*/
|
||||
ret = postcopy_preempt_establish_channel(s);
|
||||
if (ret) {
|
||||
error_report("%s: postcopy_preempt_establish_channel(): %d",
|
||||
__func__, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Last handshake with destination on the resume (destination will
|
||||
* switch to postcopy-active afterwards)
|
||||
|
@ -3643,14 +3694,6 @@ static MigThrError postcopy_pause(MigrationState *s)
|
|||
if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
|
||||
/* Woken up by a recover procedure. Give it a shot */
|
||||
|
||||
if (postcopy_preempt_wait_channel(s)) {
|
||||
/*
|
||||
* Preempt enabled, and new channel create failed; loop
|
||||
* back to wait for another recovery.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Firstly, let's wake up the return path now, with a new
|
||||
* return path channel.
|
||||
|
@ -4343,15 +4386,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
|
|||
}
|
||||
}
|
||||
|
||||
/* This needs to be done before resuming a postcopy */
|
||||
if (postcopy_preempt_setup(s, &local_err)) {
|
||||
error_report_err(local_err);
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
|
||||
MIGRATION_STATUS_FAILED);
|
||||
migrate_fd_cleanup(s);
|
||||
return;
|
||||
}
|
||||
|
||||
if (resume) {
|
||||
/* Wakeup the main migration thread to do the recovery */
|
||||
migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
|
||||
|
@ -4525,6 +4559,7 @@ static void migration_instance_finalize(Object *obj)
|
|||
qemu_sem_destroy(&ms->postcopy_pause_sem);
|
||||
qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
|
||||
qemu_sem_destroy(&ms->rp_state.rp_sem);
|
||||
qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
|
||||
qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
|
||||
error_free(ms->error);
|
||||
}
|
||||
|
@ -4571,6 +4606,7 @@ static void migration_instance_init(Object *obj)
|
|||
qemu_sem_init(&ms->postcopy_pause_sem, 0);
|
||||
qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
|
||||
qemu_sem_init(&ms->rp_state.rp_sem, 0);
|
||||
qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
|
||||
qemu_sem_init(&ms->rate_limit_sem, 0);
|
||||
qemu_sem_init(&ms->wait_unplug_sem, 0);
|
||||
qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
|
||||
|
|
|
@ -116,6 +116,12 @@ struct MigrationIncomingState {
|
|||
unsigned int postcopy_channels;
|
||||
/* QEMUFile for postcopy only; it'll be handled by a separate thread */
|
||||
QEMUFile *postcopy_qemufile_dst;
|
||||
/*
|
||||
* When postcopy_qemufile_dst is properly setup, this sem is posted.
|
||||
* One can wait on this semaphore to wait until the preempt channel is
|
||||
* properly setup.
|
||||
*/
|
||||
QemuSemaphore postcopy_qemufile_dst_done;
|
||||
/* Postcopy priority thread is used to receive postcopy requested pages */
|
||||
QemuThread postcopy_prio_thread;
|
||||
bool postcopy_prio_thread_created;
|
||||
|
@ -276,6 +282,12 @@ struct MigrationState {
|
|||
*/
|
||||
bool rp_thread_created;
|
||||
QemuSemaphore rp_sem;
|
||||
/*
|
||||
* We post to this when we got one PONG from dest. So far it's an
|
||||
* easy way to know the main channel has successfully established
|
||||
* on dest QEMU.
|
||||
*/
|
||||
QemuSemaphore rp_pong_acks;
|
||||
} rp_state;
|
||||
|
||||
double mbps;
|
||||
|
@ -474,7 +486,4 @@ void migration_cancel(const Error *error);
|
|||
void populate_vfio_info(MigrationInfo *info);
|
||||
void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
|
||||
|
||||
bool migrate_multi_channels_is_allowed(void);
|
||||
void migrate_protocol_allow_multi_channels(bool allow);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -516,7 +516,7 @@ void multifd_save_cleanup(void)
|
|||
{
|
||||
int i;
|
||||
|
||||
if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
|
||||
if (!migrate_use_multifd()) {
|
||||
return;
|
||||
}
|
||||
multifd_send_terminate_threads(NULL);
|
||||
|
@ -843,30 +843,29 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
|
|||
ioc, object_get_typename(OBJECT(ioc)),
|
||||
migrate_get_current()->hostname, error);
|
||||
|
||||
if (!error) {
|
||||
if (migrate_channel_requires_tls_upgrade(ioc)) {
|
||||
multifd_tls_channel_connect(p, ioc, &error);
|
||||
if (!error) {
|
||||
/*
|
||||
* tls_channel_connect will call back to this
|
||||
* function after the TLS handshake,
|
||||
* so we mustn't call multifd_send_thread until then
|
||||
*/
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
migration_ioc_register_yank(ioc);
|
||||
p->registered_yank = true;
|
||||
p->c = ioc;
|
||||
qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
}
|
||||
return true;
|
||||
if (error) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
if (migrate_channel_requires_tls_upgrade(ioc)) {
|
||||
multifd_tls_channel_connect(p, ioc, &error);
|
||||
if (!error) {
|
||||
/*
|
||||
* tls_channel_connect will call back to this
|
||||
* function after the TLS handshake,
|
||||
* so we mustn't call multifd_send_thread until then
|
||||
*/
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
migration_ioc_register_yank(ioc);
|
||||
p->registered_yank = true;
|
||||
p->c = ioc;
|
||||
qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
|
||||
|
@ -893,19 +892,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
|
|||
Error *local_err = NULL;
|
||||
|
||||
trace_multifd_new_send_channel_async(p->id);
|
||||
if (qio_task_propagate_error(task, &local_err)) {
|
||||
goto cleanup;
|
||||
} else {
|
||||
if (!qio_task_propagate_error(task, &local_err)) {
|
||||
p->c = QIO_CHANNEL(sioc);
|
||||
qio_channel_set_delay(p->c, false);
|
||||
p->running = true;
|
||||
if (!multifd_channel_connect(p, sioc, local_err)) {
|
||||
goto cleanup;
|
||||
if (multifd_channel_connect(p, sioc, local_err)) {
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
multifd_new_send_channel_cleanup(p, sioc, local_err);
|
||||
}
|
||||
|
||||
|
@ -918,10 +913,6 @@ int multifd_save_setup(Error **errp)
|
|||
if (!migrate_use_multifd()) {
|
||||
return 0;
|
||||
}
|
||||
if (!migrate_multi_channels_is_allowed()) {
|
||||
error_setg(errp, "multifd is not supported by current protocol");
|
||||
return -1;
|
||||
}
|
||||
|
||||
thread_count = migrate_multifd_channels();
|
||||
multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
|
||||
|
@ -1022,26 +1013,33 @@ static void multifd_recv_terminate_threads(Error *err)
|
|||
}
|
||||
}
|
||||
|
||||
int multifd_load_cleanup(Error **errp)
|
||||
void multifd_load_shutdown(void)
|
||||
{
|
||||
if (migrate_use_multifd()) {
|
||||
multifd_recv_terminate_threads(NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void multifd_load_cleanup(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
|
||||
return 0;
|
||||
if (!migrate_use_multifd()) {
|
||||
return;
|
||||
}
|
||||
multifd_recv_terminate_threads(NULL);
|
||||
for (i = 0; i < migrate_multifd_channels(); i++) {
|
||||
MultiFDRecvParams *p = &multifd_recv_state->params[i];
|
||||
|
||||
if (p->running) {
|
||||
p->quit = true;
|
||||
/*
|
||||
* multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
|
||||
* however try to wakeup it without harm in cleanup phase.
|
||||
*/
|
||||
qemu_sem_post(&p->sem_sync);
|
||||
qemu_thread_join(&p->thread);
|
||||
}
|
||||
|
||||
qemu_thread_join(&p->thread);
|
||||
}
|
||||
for (i = 0; i < migrate_multifd_channels(); i++) {
|
||||
MultiFDRecvParams *p = &multifd_recv_state->params[i];
|
||||
|
@ -1067,8 +1065,6 @@ int multifd_load_cleanup(Error **errp)
|
|||
multifd_recv_state->params = NULL;
|
||||
g_free(multifd_recv_state);
|
||||
multifd_recv_state = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void multifd_recv_sync_main(void)
|
||||
|
@ -1116,10 +1112,7 @@ static void *multifd_recv_thread(void *opaque)
|
|||
|
||||
ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
|
||||
p->packet_len, &local_err);
|
||||
if (ret == 0) { /* EOF */
|
||||
break;
|
||||
}
|
||||
if (ret == -1) { /* Error */
|
||||
if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1180,10 +1173,6 @@ int multifd_load_setup(Error **errp)
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (!migrate_multi_channels_is_allowed()) {
|
||||
error_setg(errp, "multifd is not supported by current protocol");
|
||||
return -1;
|
||||
}
|
||||
thread_count = migrate_multifd_channels();
|
||||
multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
|
||||
multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -16,7 +16,8 @@
|
|||
int multifd_save_setup(Error **errp);
|
||||
void multifd_save_cleanup(void);
|
||||
int multifd_load_setup(Error **errp);
|
||||
int multifd_load_cleanup(Error **errp);
|
||||
void multifd_load_cleanup(void);
|
||||
void multifd_load_shutdown(void);
|
||||
bool multifd_recv_all_channels_created(void);
|
||||
void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
|
||||
void multifd_recv_sync_main(void);
|
||||
|
|
|
@ -1197,6 +1197,11 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
|
|||
}
|
||||
|
||||
if (migrate_postcopy_preempt()) {
|
||||
/*
|
||||
* The preempt channel is established in asynchronous way. Wait
|
||||
* for its completion.
|
||||
*/
|
||||
qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
|
||||
/*
|
||||
* This thread needs to be created after the temp pages because
|
||||
* it'll fetch RAM_CHANNEL_POSTCOPY PostcopyTmpPage immediately.
|
||||
|
@ -1544,6 +1549,7 @@ void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
|
|||
*/
|
||||
qemu_file_set_blocking(file, true);
|
||||
mis->postcopy_qemufile_dst = file;
|
||||
qemu_sem_post(&mis->postcopy_qemufile_dst_done);
|
||||
trace_postcopy_preempt_new_channel();
|
||||
}
|
||||
|
||||
|
@ -1612,14 +1618,21 @@ out:
|
|||
postcopy_preempt_send_channel_done(s, ioc, local_err);
|
||||
}
|
||||
|
||||
/* Returns 0 if channel established, -1 for error. */
|
||||
int postcopy_preempt_wait_channel(MigrationState *s)
|
||||
/*
|
||||
* This function will kick off an async task to establish the preempt
|
||||
* channel, and wait until the connection setup completed. Returns 0 if
|
||||
* channel established, -1 for error.
|
||||
*/
|
||||
int postcopy_preempt_establish_channel(MigrationState *s)
|
||||
{
|
||||
/* If preempt not enabled, no need to wait */
|
||||
if (!migrate_postcopy_preempt()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Kick off async task to establish preempt channel */
|
||||
postcopy_preempt_setup(s);
|
||||
|
||||
/*
|
||||
* We need the postcopy preempt channel to be established before
|
||||
* starting doing anything.
|
||||
|
@ -1629,22 +1642,10 @@ int postcopy_preempt_wait_channel(MigrationState *s)
|
|||
return s->postcopy_qemufile_src ? 0 : -1;
|
||||
}
|
||||
|
||||
int postcopy_preempt_setup(MigrationState *s, Error **errp)
|
||||
void postcopy_preempt_setup(MigrationState *s)
|
||||
{
|
||||
if (!migrate_postcopy_preempt()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!migrate_multi_channels_is_allowed()) {
|
||||
error_setg(errp, "Postcopy preempt is not supported as current "
|
||||
"migration stream does not support multi-channels.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Kick an async task to connect */
|
||||
socket_send_channel_create(postcopy_preempt_send_channel_new, s);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void postcopy_pause_ram_fast_load(MigrationIncomingState *mis)
|
||||
|
|
|
@ -191,7 +191,7 @@ enum PostcopyChannels {
|
|||
};
|
||||
|
||||
void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
|
||||
int postcopy_preempt_setup(MigrationState *s, Error **errp);
|
||||
int postcopy_preempt_wait_channel(MigrationState *s);
|
||||
void postcopy_preempt_setup(MigrationState *s);
|
||||
int postcopy_preempt_establish_channel(MigrationState *s);
|
||||
|
||||
#endif
|
||||
|
|
148
migration/ram.c
148
migration/ram.c
|
@ -67,21 +67,53 @@
|
|||
/***********************************************************/
|
||||
/* ram save/restore */
|
||||
|
||||
/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
|
||||
* worked for pages that where filled with the same char. We switched
|
||||
/*
|
||||
* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
|
||||
* worked for pages that were filled with the same char. We switched
|
||||
* it to only search for the zero value. And to avoid confusion with
|
||||
* RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
|
||||
* RAM_SAVE_FLAG_COMPRESS_PAGE just rename it.
|
||||
*/
|
||||
|
||||
#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
|
||||
/*
|
||||
* RAM_SAVE_FLAG_FULL was obsoleted in 2009, it can be reused now
|
||||
*/
|
||||
#define RAM_SAVE_FLAG_FULL 0x01
|
||||
#define RAM_SAVE_FLAG_ZERO 0x02
|
||||
#define RAM_SAVE_FLAG_MEM_SIZE 0x04
|
||||
#define RAM_SAVE_FLAG_PAGE 0x08
|
||||
#define RAM_SAVE_FLAG_EOS 0x10
|
||||
#define RAM_SAVE_FLAG_CONTINUE 0x20
|
||||
#define RAM_SAVE_FLAG_XBZRLE 0x40
|
||||
/* 0x80 is reserved in migration.h start with 0x100 next */
|
||||
/* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */
|
||||
#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
|
||||
/* We can't use any flag that is bigger than 0x200 */
|
||||
|
||||
int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
|
||||
uint8_t *, int) = xbzrle_encode_buffer;
|
||||
#if defined(CONFIG_AVX512BW_OPT)
|
||||
#include "qemu/cpuid.h"
|
||||
static void __attribute__((constructor)) init_cpu_flag(void)
|
||||
{
|
||||
unsigned max = __get_cpuid_max(0, NULL);
|
||||
int a, b, c, d;
|
||||
if (max >= 1) {
|
||||
__cpuid(1, a, b, c, d);
|
||||
/* We must check that AVX is not just available, but usable. */
|
||||
if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
|
||||
int bv;
|
||||
__asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
|
||||
__cpuid_count(7, 0, a, b, c, d);
|
||||
/* 0xe6:
|
||||
* XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
|
||||
* and ZMM16-ZMM31 state are enabled by OS)
|
||||
* XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
|
||||
*/
|
||||
if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
|
||||
xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
XBZRLECacheStats xbzrle_counters;
|
||||
|
||||
|
@ -330,6 +362,8 @@ struct RAMState {
|
|||
PageSearchStatus pss[RAM_CHANNEL_MAX];
|
||||
/* UFFD file descriptor, used in 'write-tracking' migration */
|
||||
int uffdio_fd;
|
||||
/* total ram size in bytes */
|
||||
uint64_t ram_bytes_total;
|
||||
/* Last block that we have visited searching for dirty pages */
|
||||
RAMBlock *last_seen_block;
|
||||
/* Last dirty target page we have sent */
|
||||
|
@ -450,6 +484,13 @@ void dirty_sync_missed_zero_copy(void)
|
|||
ram_counters.dirty_sync_missed_zero_copy++;
|
||||
}
|
||||
|
||||
struct MigrationOps {
|
||||
int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss);
|
||||
};
|
||||
typedef struct MigrationOps MigrationOps;
|
||||
|
||||
MigrationOps *migration_ops;
|
||||
|
||||
CompressionStats compression_counters;
|
||||
|
||||
struct CompressParam {
|
||||
|
@ -797,9 +838,9 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
|
|||
memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
|
||||
|
||||
/* XBZRLE encoding (if there is no overflow) */
|
||||
encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
|
||||
TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
|
||||
TARGET_PAGE_SIZE);
|
||||
encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
|
||||
TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
|
||||
TARGET_PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* Update the cache contents, so that it corresponds to the data
|
||||
|
@ -1546,17 +1587,23 @@ retry:
|
|||
return pages;
|
||||
}
|
||||
|
||||
#define PAGE_ALL_CLEAN 0
|
||||
#define PAGE_TRY_AGAIN 1
|
||||
#define PAGE_DIRTY_FOUND 2
|
||||
/**
|
||||
* find_dirty_block: find the next dirty page and update any state
|
||||
* associated with the search process.
|
||||
*
|
||||
* Returns true if a page is found
|
||||
* Returns:
|
||||
* PAGE_ALL_CLEAN: no dirty page found, give up
|
||||
* PAGE_TRY_AGAIN: no dirty page found, retry for next block
|
||||
* PAGE_DIRTY_FOUND: dirty page found
|
||||
*
|
||||
* @rs: current RAM state
|
||||
* @pss: data about the state of the current dirty page scan
|
||||
* @again: set to false if the search has scanned the whole of RAM
|
||||
*/
|
||||
static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
|
||||
static int find_dirty_block(RAMState *rs, PageSearchStatus *pss)
|
||||
{
|
||||
/* Update pss->page for the next dirty bit in ramblock */
|
||||
pss_find_next_dirty(pss);
|
||||
|
@ -1567,8 +1614,7 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
|
|||
* We've been once around the RAM and haven't found anything.
|
||||
* Give up.
|
||||
*/
|
||||
*again = false;
|
||||
return false;
|
||||
return PAGE_ALL_CLEAN;
|
||||
}
|
||||
if (!offset_in_ramblock(pss->block,
|
||||
((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
|
||||
|
@ -1597,13 +1643,10 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
|
|||
}
|
||||
}
|
||||
/* Didn't find anything this time, but try again on the new block */
|
||||
*again = true;
|
||||
return false;
|
||||
return PAGE_TRY_AGAIN;
|
||||
} else {
|
||||
/* Can go around again, but... */
|
||||
*again = true;
|
||||
/* We've found something so probably don't need to */
|
||||
return true;
|
||||
/* We've found something */
|
||||
return PAGE_DIRTY_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2291,14 +2334,14 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss,
|
|||
}
|
||||
|
||||
/**
|
||||
* ram_save_target_page: save one target page
|
||||
* ram_save_target_page_legacy: save one target page
|
||||
*
|
||||
* Returns the number of pages written
|
||||
*
|
||||
* @rs: current RAM state
|
||||
* @pss: data about the page we want to send
|
||||
*/
|
||||
static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
|
||||
static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
|
||||
{
|
||||
RAMBlock *block = pss->block;
|
||||
ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
|
||||
|
@ -2424,7 +2467,7 @@ static int ram_save_host_page_urgent(PageSearchStatus *pss)
|
|||
|
||||
if (page_dirty) {
|
||||
/* Be strict to return code; it must be 1, or what else? */
|
||||
if (ram_save_target_page(rs, pss) != 1) {
|
||||
if (migration_ops->ram_save_target_page(rs, pss) != 1) {
|
||||
error_report_once("%s: ram_save_target_page failed", __func__);
|
||||
ret = -1;
|
||||
goto out;
|
||||
|
@ -2493,7 +2536,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
|
|||
if (preempt_active) {
|
||||
qemu_mutex_unlock(&rs->bitmap_mutex);
|
||||
}
|
||||
tmppages = ram_save_target_page(rs, pss);
|
||||
tmppages = migration_ops->ram_save_target_page(rs, pss);
|
||||
if (tmppages >= 0) {
|
||||
pages += tmppages;
|
||||
/*
|
||||
|
@ -2542,10 +2585,9 @@ static int ram_find_and_save_block(RAMState *rs)
|
|||
{
|
||||
PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY];
|
||||
int pages = 0;
|
||||
bool again, found;
|
||||
|
||||
/* No dirty page as there is zero RAM */
|
||||
if (!ram_bytes_total()) {
|
||||
if (!rs->ram_bytes_total) {
|
||||
return pages;
|
||||
}
|
||||
|
||||
|
@ -2563,19 +2605,23 @@ static int ram_find_and_save_block(RAMState *rs)
|
|||
|
||||
pss_init(pss, rs->last_seen_block, rs->last_page);
|
||||
|
||||
do {
|
||||
again = true;
|
||||
found = get_queued_page(rs, pss);
|
||||
|
||||
if (!found) {
|
||||
while (true){
|
||||
if (!get_queued_page(rs, pss)) {
|
||||
/* priority queue empty, so just search for something dirty */
|
||||
found = find_dirty_block(rs, pss, &again);
|
||||
int res = find_dirty_block(rs, pss);
|
||||
if (res != PAGE_DIRTY_FOUND) {
|
||||
if (res == PAGE_ALL_CLEAN) {
|
||||
break;
|
||||
} else if (res == PAGE_TRY_AGAIN) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
pages = ram_save_host_page(rs, pss);
|
||||
pages = ram_save_host_page(rs, pss);
|
||||
if (pages) {
|
||||
break;
|
||||
}
|
||||
} while (!pages && again);
|
||||
}
|
||||
|
||||
rs->last_seen_block = pss->block;
|
||||
rs->last_page = pss->page;
|
||||
|
@ -2596,28 +2642,30 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero)
|
|||
}
|
||||
}
|
||||
|
||||
static uint64_t ram_bytes_total_common(bool count_ignored)
|
||||
static uint64_t ram_bytes_total_with_ignored(void)
|
||||
{
|
||||
RAMBlock *block;
|
||||
uint64_t total = 0;
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
|
||||
if (count_ignored) {
|
||||
RAMBLOCK_FOREACH_MIGRATABLE(block) {
|
||||
total += block->used_length;
|
||||
}
|
||||
} else {
|
||||
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
|
||||
total += block->used_length;
|
||||
}
|
||||
RAMBLOCK_FOREACH_MIGRATABLE(block) {
|
||||
total += block->used_length;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
uint64_t ram_bytes_total(void)
|
||||
{
|
||||
return ram_bytes_total_common(false);
|
||||
RAMBlock *block;
|
||||
uint64_t total = 0;
|
||||
|
||||
RCU_READ_LOCK_GUARD();
|
||||
|
||||
RAMBLOCK_FOREACH_NOT_IGNORED(block) {
|
||||
total += block->used_length;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
static void xbzrle_load_setup(void)
|
||||
|
@ -2688,6 +2736,8 @@ static void ram_save_cleanup(void *opaque)
|
|||
xbzrle_cleanup();
|
||||
compress_threads_save_cleanup();
|
||||
ram_state_cleanup(rsp);
|
||||
g_free(migration_ops);
|
||||
migration_ops = NULL;
|
||||
}
|
||||
|
||||
static void ram_state_reset(RAMState *rs)
|
||||
|
@ -3002,13 +3052,14 @@ static int ram_state_init(RAMState **rsp)
|
|||
qemu_mutex_init(&(*rsp)->bitmap_mutex);
|
||||
qemu_mutex_init(&(*rsp)->src_page_req_mutex);
|
||||
QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
|
||||
(*rsp)->ram_bytes_total = ram_bytes_total();
|
||||
|
||||
/*
|
||||
* Count the total number of pages used by ram blocks not including any
|
||||
* gaps due to alignment or unplugs.
|
||||
* This must match with the initial values of dirty bitmap.
|
||||
*/
|
||||
(*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
|
||||
(*rsp)->migration_dirty_pages = (*rsp)->ram_bytes_total >> TARGET_PAGE_BITS;
|
||||
ram_state_reset(*rsp);
|
||||
|
||||
return 0;
|
||||
|
@ -3222,7 +3273,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
|
|||
(*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f;
|
||||
|
||||
WITH_RCU_READ_LOCK_GUARD() {
|
||||
qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
|
||||
qemu_put_be64(f, ram_bytes_total_with_ignored()
|
||||
| RAM_SAVE_FLAG_MEM_SIZE);
|
||||
|
||||
RAMBLOCK_FOREACH_MIGRATABLE(block) {
|
||||
qemu_put_byte(f, strlen(block->idstr));
|
||||
|
@ -3241,6 +3293,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
|
|||
ram_control_before_iterate(f, RAM_CONTROL_SETUP);
|
||||
ram_control_after_iterate(f, RAM_CONTROL_SETUP);
|
||||
|
||||
migration_ops = g_malloc0(sizeof(MigrationOps));
|
||||
migration_ops->ram_save_target_page = ram_save_target_page_legacy;
|
||||
ret = multifd_send_sync_main(f);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
|
|
|
@ -1552,7 +1552,7 @@ void qemu_savevm_state_pending_estimate(uint64_t *res_precopy_only,
|
|||
*res_postcopy_only = 0;
|
||||
|
||||
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
||||
if (!se->ops || !se->ops->state_pending_exact) {
|
||||
if (!se->ops || !se->ops->state_pending_estimate) {
|
||||
continue;
|
||||
}
|
||||
if (se->ops->is_active) {
|
||||
|
@ -1560,9 +1560,9 @@ void qemu_savevm_state_pending_estimate(uint64_t *res_precopy_only,
|
|||
continue;
|
||||
}
|
||||
}
|
||||
se->ops->state_pending_exact(se->opaque,
|
||||
res_precopy_only, res_compatible,
|
||||
res_postcopy_only);
|
||||
se->ops->state_pending_estimate(se->opaque,
|
||||
res_precopy_only, res_compatible,
|
||||
res_postcopy_only);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1577,7 +1577,7 @@ void qemu_savevm_state_pending_exact(uint64_t *res_precopy_only,
|
|||
*res_postcopy_only = 0;
|
||||
|
||||
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
||||
if (!se->ops || !se->ops->state_pending_estimate) {
|
||||
if (!se->ops || !se->ops->state_pending_exact) {
|
||||
continue;
|
||||
}
|
||||
if (se->ops->is_active) {
|
||||
|
@ -1585,9 +1585,9 @@ void qemu_savevm_state_pending_exact(uint64_t *res_precopy_only,
|
|||
continue;
|
||||
}
|
||||
}
|
||||
se->ops->state_pending_estimate(se->opaque,
|
||||
res_precopy_only, res_compatible,
|
||||
res_postcopy_only);
|
||||
se->ops->state_pending_exact(se->opaque,
|
||||
res_precopy_only, res_compatible,
|
||||
res_postcopy_only);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2200,7 +2200,11 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
|
|||
qemu_sem_post(&mis->postcopy_pause_sem_fault);
|
||||
|
||||
if (migrate_postcopy_preempt()) {
|
||||
/* The channel should already be setup again; make sure of it */
|
||||
/*
|
||||
* The preempt channel will be created in async manner, now let's
|
||||
* wait for it and make sure it's created.
|
||||
*/
|
||||
qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
|
||||
assert(mis->postcopy_qemufile_dst);
|
||||
/* Kick the fast ram load thread too */
|
||||
qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
|
||||
|
|
|
@ -174,3 +174,127 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
|
|||
|
||||
return d;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_AVX512BW_OPT)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bw")
|
||||
#include <immintrin.h>
|
||||
int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
||||
uint8_t *dst, int dlen)
|
||||
{
|
||||
uint32_t zrun_len = 0, nzrun_len = 0;
|
||||
int d = 0, i = 0, num = 0;
|
||||
uint8_t *nzrun_start = NULL;
|
||||
/* add 1 to include residual part in main loop */
|
||||
uint32_t count512s = (slen >> 6) + 1;
|
||||
/* countResidual is tail of data, i.e., countResidual = slen % 64 */
|
||||
uint32_t count_residual = slen & 0b111111;
|
||||
bool never_same = true;
|
||||
uint64_t mask_residual = 1;
|
||||
mask_residual <<= count_residual;
|
||||
mask_residual -= 1;
|
||||
__m512i r = _mm512_set1_epi32(0);
|
||||
|
||||
while (count512s) {
|
||||
if (d + 2 > dlen) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int bytes_to_check = 64;
|
||||
uint64_t mask = 0xffffffffffffffff;
|
||||
if (count512s == 1) {
|
||||
bytes_to_check = count_residual;
|
||||
mask = mask_residual;
|
||||
}
|
||||
__m512i old_data = _mm512_mask_loadu_epi8(r,
|
||||
mask, old_buf + i);
|
||||
__m512i new_data = _mm512_mask_loadu_epi8(r,
|
||||
mask, new_buf + i);
|
||||
uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
|
||||
count512s--;
|
||||
|
||||
bool is_same = (comp & 0x1);
|
||||
while (bytes_to_check) {
|
||||
if (is_same) {
|
||||
if (nzrun_len) {
|
||||
d += uleb128_encode_small(dst + d, nzrun_len);
|
||||
if (d + nzrun_len > dlen) {
|
||||
return -1;
|
||||
}
|
||||
nzrun_start = new_buf + i - nzrun_len;
|
||||
memcpy(dst + d, nzrun_start, nzrun_len);
|
||||
d += nzrun_len;
|
||||
nzrun_len = 0;
|
||||
}
|
||||
/* 64 data at a time for speed */
|
||||
if (count512s && (comp == 0xffffffffffffffff)) {
|
||||
i += 64;
|
||||
zrun_len += 64;
|
||||
break;
|
||||
}
|
||||
never_same = false;
|
||||
num = __builtin_ctzll(~comp);
|
||||
num = (num < bytes_to_check) ? num : bytes_to_check;
|
||||
zrun_len += num;
|
||||
bytes_to_check -= num;
|
||||
comp >>= num;
|
||||
i += num;
|
||||
if (bytes_to_check) {
|
||||
/* still has different data after same data */
|
||||
d += uleb128_encode_small(dst + d, zrun_len);
|
||||
zrun_len = 0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (never_same || zrun_len) {
|
||||
/*
|
||||
* never_same only acts if
|
||||
* data begins with diff in first count512s
|
||||
*/
|
||||
d += uleb128_encode_small(dst + d, zrun_len);
|
||||
zrun_len = 0;
|
||||
never_same = false;
|
||||
}
|
||||
/* has diff, 64 data at a time for speed */
|
||||
if ((bytes_to_check == 64) && (comp == 0x0)) {
|
||||
i += 64;
|
||||
nzrun_len += 64;
|
||||
break;
|
||||
}
|
||||
num = __builtin_ctzll(comp);
|
||||
num = (num < bytes_to_check) ? num : bytes_to_check;
|
||||
nzrun_len += num;
|
||||
bytes_to_check -= num;
|
||||
comp >>= num;
|
||||
i += num;
|
||||
if (bytes_to_check) {
|
||||
/* mask like 111000 */
|
||||
d += uleb128_encode_small(dst + d, nzrun_len);
|
||||
/* overflow */
|
||||
if (d + nzrun_len > dlen) {
|
||||
return -1;
|
||||
}
|
||||
nzrun_start = new_buf + i - nzrun_len;
|
||||
memcpy(dst + d, nzrun_start, nzrun_len);
|
||||
d += nzrun_len;
|
||||
nzrun_len = 0;
|
||||
is_same = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nzrun_len != 0) {
|
||||
d += uleb128_encode_small(dst + d, nzrun_len);
|
||||
/* overflow */
|
||||
if (d + nzrun_len > dlen) {
|
||||
return -1;
|
||||
}
|
||||
nzrun_start = new_buf + i - nzrun_len;
|
||||
memcpy(dst + d, nzrun_start, nzrun_len);
|
||||
d += nzrun_len;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
|
|
@ -18,4 +18,8 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
|||
uint8_t *dst, int dlen);
|
||||
|
||||
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
|
||||
#if defined(CONFIG_AVX512BW_OPT)
|
||||
int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
||||
uint8_t *dst, int dlen);
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -70,6 +70,7 @@ meson_options_help() {
|
|||
printf "%s\n" ' attr attr/xattr support'
|
||||
printf "%s\n" ' auth-pam PAM access control'
|
||||
printf "%s\n" ' avx2 AVX2 optimizations'
|
||||
printf "%s\n" ' avx512bw AVX512BW optimizations'
|
||||
printf "%s\n" ' avx512f AVX512F optimizations'
|
||||
printf "%s\n" ' blkio libblkio block device driver'
|
||||
printf "%s\n" ' bochs bochs image format support'
|
||||
|
@ -198,6 +199,8 @@ _meson_option_parse() {
|
|||
--disable-auth-pam) printf "%s" -Dauth_pam=disabled ;;
|
||||
--enable-avx2) printf "%s" -Davx2=enabled ;;
|
||||
--disable-avx2) printf "%s" -Davx2=disabled ;;
|
||||
--enable-avx512bw) printf "%s" -Davx512bw=enabled ;;
|
||||
--disable-avx512bw) printf "%s" -Davx512bw=disabled ;;
|
||||
--enable-avx512f) printf "%s" -Davx512f=enabled ;;
|
||||
--disable-avx512f) printf "%s" -Davx512f=disabled ;;
|
||||
--enable-gcov) printf "%s" -Db_coverage=true ;;
|
||||
|
|
|
@ -3,6 +3,12 @@ qht_bench = executable('qht-bench',
|
|||
sources: 'qht-bench.c',
|
||||
dependencies: [qemuutil])
|
||||
|
||||
if have_system
|
||||
xbzrle_bench = executable('xbzrle-bench',
|
||||
sources: 'xbzrle-bench.c',
|
||||
dependencies: [qemuutil,migration])
|
||||
endif
|
||||
|
||||
executable('atomic_add-bench',
|
||||
sources: files('atomic_add-bench.c'),
|
||||
dependencies: [qemuutil],
|
||||
|
|
|
@ -0,0 +1,469 @@
|
|||
/*
|
||||
* Xor Based Zero Run Length Encoding unit tests.
|
||||
*
|
||||
* Copyright 2013 Red Hat, Inc. and/or its affiliates
|
||||
*
|
||||
* Authors:
|
||||
* Orit Wasserman <owasserm@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/cutils.h"
|
||||
#include "../migration/xbzrle.h"
|
||||
|
||||
#if defined(CONFIG_AVX512BW_OPT)
|
||||
#define XBZRLE_PAGE_SIZE 4096
|
||||
static bool is_cpu_support_avx512bw;
|
||||
#include "qemu/cpuid.h"
|
||||
static void __attribute__((constructor)) init_cpu_flag(void)
|
||||
{
|
||||
unsigned max = __get_cpuid_max(0, NULL);
|
||||
int a, b, c, d;
|
||||
is_cpu_support_avx512bw = false;
|
||||
if (max >= 1) {
|
||||
__cpuid(1, a, b, c, d);
|
||||
/* We must check that AVX is not just available, but usable. */
|
||||
if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
|
||||
int bv;
|
||||
__asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
|
||||
__cpuid_count(7, 0, a, b, c, d);
|
||||
/* 0xe6:
|
||||
* XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
|
||||
* and ZMM16-ZMM31 state are enabled by OS)
|
||||
* XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
|
||||
*/
|
||||
if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
|
||||
is_cpu_support_avx512bw = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ;
|
||||
}
|
||||
|
||||
struct ResTime {
|
||||
float t_raw;
|
||||
float t_512;
|
||||
};
|
||||
|
||||
|
||||
/* Function prototypes
|
||||
int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
||||
uint8_t *dst, int dlen);
|
||||
*/
|
||||
static void encode_decode_zero(struct ResTime *res)
|
||||
{
|
||||
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
int i = 0;
|
||||
int dlen = 0, dlen512 = 0;
|
||||
int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
|
||||
|
||||
for (i = diff_len; i > 0; i--) {
|
||||
buffer[1000 + i] = i;
|
||||
buffer512[1000 + i] = i;
|
||||
}
|
||||
|
||||
buffer[1000 + diff_len + 3] = 103;
|
||||
buffer[1000 + diff_len + 5] = 105;
|
||||
|
||||
buffer512[1000 + diff_len + 3] = 103;
|
||||
buffer512[1000 + diff_len + 5] = 105;
|
||||
|
||||
/* encode zero page */
|
||||
time_t t_start, t_end, t_start512, t_end512;
|
||||
t_start = clock();
|
||||
dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
t_end = clock();
|
||||
float time_val = difftime(t_end, t_start);
|
||||
g_assert(dlen == 0);
|
||||
|
||||
t_start512 = clock();
|
||||
dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE,
|
||||
compressed512, XBZRLE_PAGE_SIZE);
|
||||
t_end512 = clock();
|
||||
float time_val512 = difftime(t_end512, t_start512);
|
||||
g_assert(dlen512 == 0);
|
||||
|
||||
res->t_raw = time_val;
|
||||
res->t_512 = time_val512;
|
||||
|
||||
g_free(buffer);
|
||||
g_free(compressed);
|
||||
g_free(buffer512);
|
||||
g_free(compressed512);
|
||||
|
||||
}
|
||||
|
||||
static void test_encode_decode_zero_avx512(void)
|
||||
{
|
||||
int i;
|
||||
float time_raw = 0.0, time_512 = 0.0;
|
||||
struct ResTime res;
|
||||
for (i = 0; i < 10000; i++) {
|
||||
encode_decode_zero(&res);
|
||||
time_raw += res.t_raw;
|
||||
time_512 += res.t_512;
|
||||
}
|
||||
printf("Zero test:\n");
|
||||
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
|
||||
printf("512 xbzrle_encode time is %f ms\n", time_512);
|
||||
}
|
||||
|
||||
static void encode_decode_unchanged(struct ResTime *res)
|
||||
{
|
||||
uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
int i = 0;
|
||||
int dlen = 0, dlen512 = 0;
|
||||
int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
|
||||
|
||||
for (i = diff_len; i > 0; i--) {
|
||||
test[1000 + i] = i + 4;
|
||||
test512[1000 + i] = i + 4;
|
||||
}
|
||||
|
||||
test[1000 + diff_len + 3] = 107;
|
||||
test[1000 + diff_len + 5] = 109;
|
||||
|
||||
test512[1000 + diff_len + 3] = 107;
|
||||
test512[1000 + diff_len + 5] = 109;
|
||||
|
||||
/* test unchanged buffer */
|
||||
time_t t_start, t_end, t_start512, t_end512;
|
||||
t_start = clock();
|
||||
dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
t_end = clock();
|
||||
float time_val = difftime(t_end, t_start);
|
||||
g_assert(dlen == 0);
|
||||
|
||||
t_start512 = clock();
|
||||
dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE,
|
||||
compressed512, XBZRLE_PAGE_SIZE);
|
||||
t_end512 = clock();
|
||||
float time_val512 = difftime(t_end512, t_start512);
|
||||
g_assert(dlen512 == 0);
|
||||
|
||||
res->t_raw = time_val;
|
||||
res->t_512 = time_val512;
|
||||
|
||||
g_free(test);
|
||||
g_free(compressed);
|
||||
g_free(test512);
|
||||
g_free(compressed512);
|
||||
|
||||
}
|
||||
|
||||
static void test_encode_decode_unchanged_avx512(void)
|
||||
{
|
||||
int i;
|
||||
float time_raw = 0.0, time_512 = 0.0;
|
||||
struct ResTime res;
|
||||
for (i = 0; i < 10000; i++) {
|
||||
encode_decode_unchanged(&res);
|
||||
time_raw += res.t_raw;
|
||||
time_512 += res.t_512;
|
||||
}
|
||||
printf("Unchanged test:\n");
|
||||
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
|
||||
printf("512 xbzrle_encode time is %f ms\n", time_512);
|
||||
}
|
||||
|
||||
static void encode_decode_1_byte(struct ResTime *res)
|
||||
{
|
||||
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
|
||||
int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0;
|
||||
uint8_t buf[2];
|
||||
uint8_t buf512[2];
|
||||
|
||||
test[XBZRLE_PAGE_SIZE - 1] = 1;
|
||||
test512[XBZRLE_PAGE_SIZE - 1] = 1;
|
||||
|
||||
time_t t_start, t_end, t_start512, t_end512;
|
||||
t_start = clock();
|
||||
dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
t_end = clock();
|
||||
float time_val = difftime(t_end, t_start);
|
||||
g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
|
||||
|
||||
rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
|
||||
g_assert(rc == XBZRLE_PAGE_SIZE);
|
||||
g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
|
||||
|
||||
t_start512 = clock();
|
||||
dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
|
||||
compressed512, XBZRLE_PAGE_SIZE);
|
||||
t_end512 = clock();
|
||||
float time_val512 = difftime(t_end512, t_start512);
|
||||
g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2));
|
||||
|
||||
rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
g_assert(rc512 == XBZRLE_PAGE_SIZE);
|
||||
g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
|
||||
|
||||
res->t_raw = time_val;
|
||||
res->t_512 = time_val512;
|
||||
|
||||
g_free(buffer);
|
||||
g_free(compressed);
|
||||
g_free(test);
|
||||
g_free(buffer512);
|
||||
g_free(compressed512);
|
||||
g_free(test512);
|
||||
|
||||
}
|
||||
|
||||
static void test_encode_decode_1_byte_avx512(void)
|
||||
{
|
||||
int i;
|
||||
float time_raw = 0.0, time_512 = 0.0;
|
||||
struct ResTime res;
|
||||
for (i = 0; i < 10000; i++) {
|
||||
encode_decode_1_byte(&res);
|
||||
time_raw += res.t_raw;
|
||||
time_512 += res.t_512;
|
||||
}
|
||||
printf("1 byte test:\n");
|
||||
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
|
||||
printf("512 xbzrle_encode time is %f ms\n", time_512);
|
||||
}
|
||||
|
||||
static void encode_decode_overflow(struct ResTime *res)
|
||||
{
|
||||
uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
int i = 0, rc = 0, rc512 = 0;
|
||||
|
||||
for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) {
|
||||
test[i * 2] = 1;
|
||||
test512[i * 2] = 1;
|
||||
}
|
||||
|
||||
/* encode overflow */
|
||||
time_t t_start, t_end, t_start512, t_end512;
|
||||
t_start = clock();
|
||||
rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
t_end = clock();
|
||||
float time_val = difftime(t_end, t_start);
|
||||
g_assert(rc == -1);
|
||||
|
||||
t_start512 = clock();
|
||||
rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
|
||||
compressed512, XBZRLE_PAGE_SIZE);
|
||||
t_end512 = clock();
|
||||
float time_val512 = difftime(t_end512, t_start512);
|
||||
g_assert(rc512 == -1);
|
||||
|
||||
res->t_raw = time_val;
|
||||
res->t_512 = time_val512;
|
||||
|
||||
g_free(buffer);
|
||||
g_free(compressed);
|
||||
g_free(test);
|
||||
g_free(buffer512);
|
||||
g_free(compressed512);
|
||||
g_free(test512);
|
||||
|
||||
}
|
||||
|
||||
static void test_encode_decode_overflow_avx512(void)
|
||||
{
|
||||
int i;
|
||||
float time_raw = 0.0, time_512 = 0.0;
|
||||
struct ResTime res;
|
||||
for (i = 0; i < 10000; i++) {
|
||||
encode_decode_overflow(&res);
|
||||
time_raw += res.t_raw;
|
||||
time_512 += res.t_512;
|
||||
}
|
||||
printf("Overflow test:\n");
|
||||
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
|
||||
printf("512 xbzrle_encode time is %f ms\n", time_512);
|
||||
}
|
||||
|
||||
static void encode_decode_range_avx512(struct ResTime *res)
|
||||
{
|
||||
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
int i = 0, rc = 0, rc512 = 0;
|
||||
int dlen = 0, dlen512 = 0;
|
||||
|
||||
int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
|
||||
|
||||
for (i = diff_len; i > 0; i--) {
|
||||
buffer[1000 + i] = i;
|
||||
test[1000 + i] = i + 4;
|
||||
buffer512[1000 + i] = i;
|
||||
test512[1000 + i] = i + 4;
|
||||
}
|
||||
|
||||
buffer[1000 + diff_len + 3] = 103;
|
||||
test[1000 + diff_len + 3] = 107;
|
||||
|
||||
buffer[1000 + diff_len + 5] = 105;
|
||||
test[1000 + diff_len + 5] = 109;
|
||||
|
||||
buffer512[1000 + diff_len + 3] = 103;
|
||||
test512[1000 + diff_len + 3] = 107;
|
||||
|
||||
buffer512[1000 + diff_len + 5] = 105;
|
||||
test512[1000 + diff_len + 5] = 109;
|
||||
|
||||
/* test encode/decode */
|
||||
time_t t_start, t_end, t_start512, t_end512;
|
||||
t_start = clock();
|
||||
dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
t_end = clock();
|
||||
float time_val = difftime(t_end, t_start);
|
||||
rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
|
||||
g_assert(rc < XBZRLE_PAGE_SIZE);
|
||||
g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
|
||||
|
||||
t_start512 = clock();
|
||||
dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
|
||||
compressed512, XBZRLE_PAGE_SIZE);
|
||||
t_end512 = clock();
|
||||
float time_val512 = difftime(t_end512, t_start512);
|
||||
rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
|
||||
g_assert(rc512 < XBZRLE_PAGE_SIZE);
|
||||
g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
|
||||
|
||||
res->t_raw = time_val;
|
||||
res->t_512 = time_val512;
|
||||
|
||||
g_free(buffer);
|
||||
g_free(compressed);
|
||||
g_free(test);
|
||||
g_free(buffer512);
|
||||
g_free(compressed512);
|
||||
g_free(test512);
|
||||
|
||||
}
|
||||
|
||||
static void test_encode_decode_avx512(void)
|
||||
{
|
||||
int i;
|
||||
float time_raw = 0.0, time_512 = 0.0;
|
||||
struct ResTime res;
|
||||
for (i = 0; i < 10000; i++) {
|
||||
encode_decode_range_avx512(&res);
|
||||
time_raw += res.t_raw;
|
||||
time_512 += res.t_512;
|
||||
}
|
||||
printf("Encode decode test:\n");
|
||||
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
|
||||
printf("512 xbzrle_encode time is %f ms\n", time_512);
|
||||
}
|
||||
|
||||
static void encode_decode_random(struct ResTime *res)
|
||||
{
|
||||
uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
|
||||
uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
|
||||
int i = 0, rc = 0, rc512 = 0;
|
||||
int dlen = 0, dlen512 = 0;
|
||||
|
||||
int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
|
||||
/* store the index of diff */
|
||||
int dirty_index[diff_len];
|
||||
for (int j = 0; j < diff_len; j++) {
|
||||
dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
|
||||
}
|
||||
for (i = diff_len - 1; i >= 0; i--) {
|
||||
buffer[dirty_index[i]] = i;
|
||||
test[dirty_index[i]] = i + 4;
|
||||
buffer512[dirty_index[i]] = i;
|
||||
test512[dirty_index[i]] = i + 4;
|
||||
}
|
||||
|
||||
time_t t_start, t_end, t_start512, t_end512;
|
||||
t_start = clock();
|
||||
dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
t_end = clock();
|
||||
float time_val = difftime(t_end, t_start);
|
||||
rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
|
||||
g_assert(rc < XBZRLE_PAGE_SIZE);
|
||||
|
||||
t_start512 = clock();
|
||||
dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
|
||||
compressed512, XBZRLE_PAGE_SIZE);
|
||||
t_end512 = clock();
|
||||
float time_val512 = difftime(t_end512, t_start512);
|
||||
rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
|
||||
g_assert(rc512 < XBZRLE_PAGE_SIZE);
|
||||
|
||||
res->t_raw = time_val;
|
||||
res->t_512 = time_val512;
|
||||
|
||||
g_free(buffer);
|
||||
g_free(compressed);
|
||||
g_free(test);
|
||||
g_free(buffer512);
|
||||
g_free(compressed512);
|
||||
g_free(test512);
|
||||
|
||||
}
|
||||
|
||||
static void test_encode_decode_random_avx512(void)
|
||||
{
|
||||
int i;
|
||||
float time_raw = 0.0, time_512 = 0.0;
|
||||
struct ResTime res;
|
||||
for (i = 0; i < 10000; i++) {
|
||||
encode_decode_random(&res);
|
||||
time_raw += res.t_raw;
|
||||
time_512 += res.t_512;
|
||||
}
|
||||
printf("Random test:\n");
|
||||
printf("Raw xbzrle_encode time is %f ms\n", time_raw);
|
||||
printf("512 xbzrle_encode time is %f ms\n", time_512);
|
||||
}
|
||||
#endif
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
g_test_init(&argc, &argv, NULL);
|
||||
g_test_rand_int();
|
||||
#if defined(CONFIG_AVX512BW_OPT)
|
||||
if (likely(is_cpu_support_avx512bw)) {
|
||||
g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512);
|
||||
g_test_add_func("/xbzrle/encode_decode_unchanged",
|
||||
test_encode_decode_unchanged_avx512);
|
||||
g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512);
|
||||
g_test_add_func("/xbzrle/encode_decode_overflow",
|
||||
test_encode_decode_overflow_avx512);
|
||||
g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512);
|
||||
g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512);
|
||||
}
|
||||
#endif
|
||||
return g_test_run();
|
||||
}
|
|
@ -16,6 +16,35 @@
|
|||
|
||||
#define XBZRLE_PAGE_SIZE 4096
|
||||
|
||||
int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
|
||||
uint8_t *, int) = xbzrle_encode_buffer;
|
||||
#if defined(CONFIG_AVX512BW_OPT)
|
||||
#include "qemu/cpuid.h"
|
||||
static void __attribute__((constructor)) init_cpu_flag(void)
|
||||
{
|
||||
unsigned max = __get_cpuid_max(0, NULL);
|
||||
int a, b, c, d;
|
||||
if (max >= 1) {
|
||||
__cpuid(1, a, b, c, d);
|
||||
/* We must check that AVX is not just available, but usable. */
|
||||
if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
|
||||
int bv;
|
||||
__asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
|
||||
__cpuid_count(7, 0, a, b, c, d);
|
||||
/* 0xe6:
|
||||
* XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
|
||||
* and ZMM16-ZMM31 state are enabled by OS)
|
||||
* XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
|
||||
*/
|
||||
if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
|
||||
xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void test_uleb(void)
|
||||
{
|
||||
uint32_t i, val;
|
||||
|
@ -54,7 +83,7 @@ static void test_encode_decode_zero(void)
|
|||
buffer[1000 + diff_len + 5] = 105;
|
||||
|
||||
/* encode zero page */
|
||||
dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
|
||||
dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
g_assert(dlen == 0);
|
||||
|
||||
|
@ -78,7 +107,7 @@ static void test_encode_decode_unchanged(void)
|
|||
test[1000 + diff_len + 5] = 109;
|
||||
|
||||
/* test unchanged buffer */
|
||||
dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
|
||||
dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
g_assert(dlen == 0);
|
||||
|
||||
|
@ -96,7 +125,7 @@ static void test_encode_decode_1_byte(void)
|
|||
|
||||
test[XBZRLE_PAGE_SIZE - 1] = 1;
|
||||
|
||||
dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
|
||||
dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
|
||||
|
||||
|
@ -121,7 +150,7 @@ static void test_encode_decode_overflow(void)
|
|||
}
|
||||
|
||||
/* encode overflow */
|
||||
rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
|
||||
rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
g_assert(rc == -1);
|
||||
|
||||
|
@ -152,7 +181,7 @@ static void encode_decode_range(void)
|
|||
test[1000 + diff_len + 5] = 109;
|
||||
|
||||
/* test encode/decode */
|
||||
dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
|
||||
dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed,
|
||||
XBZRLE_PAGE_SIZE);
|
||||
|
||||
rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
|
||||
|
|
|
@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz
|
|||
qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
|
||||
|
||||
#userfaultfd.c
|
||||
uffd_detect_open_mode(int mode) "%d"
|
||||
uffd_query_features_nosys(int err) "errno: %i"
|
||||
uffd_query_features_api_failed(int err) "errno: %i"
|
||||
uffd_create_fd_nosys(int err) "errno: %i"
|
||||
|
|
|
@ -18,10 +18,42 @@
|
|||
#include <poll.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
typedef enum {
|
||||
UFFD_UNINITIALIZED = 0,
|
||||
UFFD_USE_DEV_PATH,
|
||||
UFFD_USE_SYSCALL,
|
||||
} uffd_open_mode;
|
||||
|
||||
int uffd_open(int flags)
|
||||
{
|
||||
#if defined(__NR_userfaultfd)
|
||||
static uffd_open_mode open_mode;
|
||||
static int uffd_dev;
|
||||
|
||||
/* Detect how to generate uffd desc when run the 1st time */
|
||||
if (open_mode == UFFD_UNINITIALIZED) {
|
||||
/*
|
||||
* Make /dev/userfaultfd the default approach because it has better
|
||||
* permission controls, meanwhile allows kernel faults without any
|
||||
* privilege requirement (e.g. SYS_CAP_PTRACE).
|
||||
*/
|
||||
uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
|
||||
if (uffd_dev >= 0) {
|
||||
open_mode = UFFD_USE_DEV_PATH;
|
||||
} else {
|
||||
/* Fallback to the system call */
|
||||
open_mode = UFFD_USE_SYSCALL;
|
||||
}
|
||||
trace_uffd_detect_open_mode(open_mode);
|
||||
}
|
||||
|
||||
if (open_mode == UFFD_USE_DEV_PATH) {
|
||||
assert(uffd_dev >= 0);
|
||||
return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
|
||||
}
|
||||
|
||||
return syscall(__NR_userfaultfd, flags);
|
||||
#else
|
||||
return -EINVAL;
|
||||
|
|
Loading…
Reference in New Issue