diff --git a/MAINTAINERS b/MAINTAINERS index 10af212632..3584d6a6c6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2465,7 +2465,7 @@ S: Maintained F: hw/net/rocker/ F: qapi/rocker.json F: tests/rocker/ -F: docs/specs/rocker.txt +F: docs/specs/rocker.rst e1000x M: Dmitry Fleytman @@ -3871,7 +3871,7 @@ F: nbd/ F: include/block/nbd* F: qemu-nbd.* F: blockdev-nbd.c -F: docs/interop/nbd.txt +F: docs/interop/nbd.rst F: docs/tools/qemu-nbd.rst F: tests/qemu-iotests/tests/*nbd* T: git https://repo.or.cz/qemu/ericb.git nbd @@ -3964,7 +3964,8 @@ L: qemu-block@nongnu.org S: Supported F: block/parallels.c F: block/parallels-ext.c -F: docs/interop/parallels.txt +F: docs/interop/parallels.rst +F: docs/interop/prl-xml.rst T: git https://src.openvz.org/scm/~den/qemu.git parallels qed diff --git a/docs/interop/index.rst b/docs/interop/index.rst index ed65395bfb..999e44eae1 100644 --- a/docs/interop/index.rst +++ b/docs/interop/index.rst @@ -14,6 +14,9 @@ are useful for making QEMU interoperate with other software. dbus-vmstate dbus-display live-block-operations + nbd + parallels + prl-xml pr-helper qmp-spec qemu-ga diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst index 691429c7af..6b549ede7c 100644 --- a/docs/interop/live-block-operations.rst +++ b/docs/interop/live-block-operations.rst @@ -931,8 +931,8 @@ Shutdown the guest, by issuing the ``quit`` QMP command:: } -Live disk backup --- ``blockdev-backup`` and the deprecated``drive-backup`` ---------------------------------------------------------------------------- +Live disk backup --- ``blockdev-backup`` and the deprecated ``drive-backup`` +---------------------------------------------------------------------------- The ``blockdev-backup`` (and the deprecated ``drive-backup``) allows you to create a point-in-time snapshot. diff --git a/docs/interop/nbd.rst b/docs/interop/nbd.rst new file mode 100644 index 0000000000..de079d31fd --- /dev/null +++ b/docs/interop/nbd.rst @@ -0,0 +1,89 @@ +QEMU NBD protocol support +========================= + +QEMU supports the NBD protocol, and has an internal NBD client (see +``block/nbd.c``), an internal NBD server (see ``blockdev-nbd.c``), and an +external NBD server tool (see ``qemu-nbd.c``). The common code is placed +in ``nbd/*``. + +The NBD protocol is specified here: +https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md + +The following paragraphs describe some specific properties of NBD +protocol realization in QEMU. + +Metadata namespaces +------------------- + +QEMU supports the ``base:allocation`` metadata context as defined in the +NBD protocol specification, and also defines an additional metadata +namespace ``qemu``. + +``qemu`` namespace +------------------ + +The ``qemu`` namespace currently contains two available metadata context +types. The first is related to exposing the contents of a dirty +bitmap alongside the associated disk contents. That metadata context +is named with the following form:: + + qemu:dirty-bitmap: + +Each dirty-bitmap metadata context defines only one flag for extents +in reply for ``NBD_CMD_BLOCK_STATUS``: + +bit 0: + ``NBD_STATE_DIRTY``, set when the extent is "dirty" + +The second is related to exposing the source of various extents within +the image, with a single metadata context named:: + + qemu:allocation-depth + +In the allocation depth context, the entire 32-bit value represents a +depth of which layer in a thin-provisioned backing chain provided the +data (0 for unallocated, 1 for the active layer, 2 for the first +backing layer, and so forth). + +For ``NBD_OPT_LIST_META_CONTEXT`` the following queries are supported +in addition to the specific ``qemu:allocation-depth`` and +``qemu:dirty-bitmap:``: + +``qemu:`` + returns list of all available metadata contexts in the namespace +``qemu:dirty-bitmap:`` + returns list of all available dirty-bitmap metadata contexts + +Features by version +------------------- + +The following list documents which qemu version first implemented +various features (both as a server exposing the feature, and as a +client taking advantage of the feature when present), to make it +easier to plan for cross-version interoperability. Note that in +several cases, the initial release containing a feature may require +additional patches from the corresponding stable branch to fix bugs in +the operation of that feature. + +2.6 + ``NBD_OPT_STARTTLS`` with TLS X.509 Certificates +2.8 + ``NBD_CMD_WRITE_ZEROES`` +2.10 + ``NBD_OPT_GO``, ``NBD_INFO_BLOCK`` +2.11 + ``NBD_OPT_STRUCTURED_REPLY`` +2.12 + ``NBD_CMD_BLOCK_STATUS`` for ``base:allocation`` +3.0 + ``NBD_OPT_STARTTLS`` with TLS Pre-Shared Keys (PSK), + ``NBD_CMD_BLOCK_STATUS`` for ``qemu:dirty-bitmap:``, ``NBD_CMD_CACHE`` +4.2 + ``NBD_FLAG_CAN_MULTI_CONN`` for shareable read-only exports, + ``NBD_CMD_FLAG_FAST_ZERO`` +5.2 + ``NBD_CMD_BLOCK_STATUS`` for ``qemu:allocation-depth`` +7.1 + ``NBD_FLAG_CAN_MULTI_CONN`` for shareable writable exports +8.2 + ``NBD_OPT_EXTENDED_HEADERS``, ``NBD_FLAG_BLOCK_STATUS_PAYLOAD`` diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt deleted file mode 100644 index 18efb251de..0000000000 --- a/docs/interop/nbd.txt +++ /dev/null @@ -1,72 +0,0 @@ -QEMU supports the NBD protocol, and has an internal NBD client (see -block/nbd.c), an internal NBD server (see blockdev-nbd.c), and an -external NBD server tool (see qemu-nbd.c). The common code is placed -in nbd/*. - -The NBD protocol is specified here: -https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md - -The following paragraphs describe some specific properties of NBD -protocol realization in QEMU. - -= Metadata namespaces = - -QEMU supports the "base:allocation" metadata context as defined in the -NBD protocol specification, and also defines an additional metadata -namespace "qemu". - -== "qemu" namespace == - -The "qemu" namespace currently contains two available metadata context -types. The first is related to exposing the contents of a dirty -bitmap alongside the associated disk contents. That metadata context -is named with the following form: - - qemu:dirty-bitmap: - -Each dirty-bitmap metadata context defines only one flag for extents -in reply for NBD_CMD_BLOCK_STATUS: - - bit 0: NBD_STATE_DIRTY, set when the extent is "dirty" - -The second is related to exposing the source of various extents within -the image, with a single metadata context named: - - qemu:allocation-depth - -In the allocation depth context, the entire 32-bit value represents a -depth of which layer in a thin-provisioned backing chain provided the -data (0 for unallocated, 1 for the active layer, 2 for the first -backing layer, and so forth). - -For NBD_OPT_LIST_META_CONTEXT the following queries are supported -in addition to the specific "qemu:allocation-depth" and -"qemu:dirty-bitmap:": - -* "qemu:" - returns list of all available metadata contexts in the - namespace. -* "qemu:dirty-bitmap:" - returns list of all available dirty-bitmap - metadata contexts. - -= Features by version = - -The following list documents which qemu version first implemented -various features (both as a server exposing the feature, and as a -client taking advantage of the feature when present), to make it -easier to plan for cross-version interoperability. Note that in -several cases, the initial release containing a feature may require -additional patches from the corresponding stable branch to fix bugs in -the operation of that feature. - -* 2.6: NBD_OPT_STARTTLS with TLS X.509 Certificates -* 2.8: NBD_CMD_WRITE_ZEROES -* 2.10: NBD_OPT_GO, NBD_INFO_BLOCK -* 2.11: NBD_OPT_STRUCTURED_REPLY -* 2.12: NBD_CMD_BLOCK_STATUS for "base:allocation" -* 3.0: NBD_OPT_STARTTLS with TLS Pre-Shared Keys (PSK), -NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE -* 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports, -NBD_CMD_FLAG_FAST_ZERO -* 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" -* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports -* 8.2: NBD_OPT_EXTENDED_HEADERS, NBD_FLAG_BLOCK_STATUS_PAYLOAD diff --git a/docs/interop/parallels.txt b/docs/interop/parallels.rst similarity index 72% rename from docs/interop/parallels.txt rename to docs/interop/parallels.rst index bb3fadf369..7b328a40c8 100644 --- a/docs/interop/parallels.txt +++ b/docs/interop/parallels.rst @@ -1,41 +1,46 @@ -= License = +Parallels Expandable Image File Format +====================================== -Copyright (c) 2015 Denis Lunev -Copyright (c) 2015 Vladimir Sementsov-Ogievskiy +.. + Copyright (c) 2015 Denis Lunev + Copyright (c) 2015 Vladimir Sementsov-Ogievskiy -This work is licensed under the terms of the GNU GPL, version 2 or later. -See the COPYING file in the top-level directory. + This work is licensed under the terms of the GNU GPL, version 2 or later. + See the COPYING file in the top-level directory. -= Parallels Expandable Image File Format = A Parallels expandable image file consists of three consecutive parts: - * header - * BAT - * data area + +* header +* BAT +* data area All numbers in a Parallels expandable image are stored in little-endian byte order. -== Definitions == +Definitions +----------- - Sector A 512-byte data chunk. +Sector + A 512-byte data chunk. - Cluster A data chunk of the size specified in the image header. - Currently, the default size is 1MiB (2048 sectors). In previous - versions, cluster sizes of 63 sectors, 256 and 252 kilobytes were - used. +Cluster + A data chunk of the size specified in the image header. + Currently, the default size is 1MiB (2048 sectors). In previous + versions, cluster sizes of 63 sectors, 256 and 252 kilobytes were used. - BAT Block Allocation Table, an entity that contains information for - guest-to-host I/O data address translation. +BAT + Block Allocation Table, an entity that contains information for + guest-to-host I/O data address translation. - -== Header == +Header +------ The header is placed at the start of an image and contains the following -fields: +fields:: -Bytes: + Bytes: 0 - 15: magic Must contain "WithoutFreeSpace" or "WithouFreSpacExt". @@ -103,44 +108,46 @@ Bytes: ext_off must meet the same requirements as cluster offsets defined by BAT entries (see below). - -== BAT == +BAT +--- BAT is placed immediately after the image header. In the file, BAT is a contiguous array of 32-bit unsigned little-endian integers with -(bat_entries * 4) bytes size. +``(bat_entries * 4)`` bytes size. Each BAT entry contains an offset from the start of the file to the -corresponding cluster. The offset set in clusters for "WithouFreSpacExt" images -and in sectors for "WithoutFreeSpace" images. +corresponding cluster. The offset set in clusters for ``WithouFreSpacExt`` +images and in sectors for ``WithoutFreeSpace`` images. If a BAT entry is zero, the corresponding cluster is not allocated and should be considered as filled with zeroes. Cluster offsets specified by BAT entries must meet the following requirements: - - the value must not be lower than data offset (provided by header.data_off - or calculated as specified above), - - the value must be lower than the desired file size, - - the value must be unique among all BAT entries, - - the result of (cluster offset - data offset) must be aligned to cluster - size. +- the value must not be lower than data offset (provided by ``header.data_off`` + or calculated as specified above) +- the value must be lower than the desired file size +- the value must be unique among all BAT entries +- the result of ``(cluster offset - data offset)`` must be aligned to + cluster size -== Data Area == +Data Area +--------- -The data area is an area from the data offset (provided by header.data_off or -calculated as specified above) to the end of the file. It represents a +The data area is an area from the data offset (provided by ``header.data_off`` +or calculated as specified above) to the end of the file. It represents a contiguous array of clusters. Most of them are allocated by the BAT, some may -be allocated by the ext_off field in the header while other may be allocated by -extensions. All clusters allocated by ext_off and extensions should meet the -same requirements as clusters specified by BAT entries. +be allocated by the ``ext_off`` field in the header while other may be +allocated by extensions. All clusters allocated by ``ext_off`` and extensions +should meet the same requirements as clusters specified by BAT entries. -== Format Extension == +Format Extension +---------------- The Format Extension is an area 1 cluster in size that provides additional format features. This cluster is addressed by the ext_off field in the header. -The format of the Format Extension area is the following: +The format of the Format Extension area is the following:: 0 - 7: magic Must be 0xAB234CEF23DCEA87 @@ -149,10 +156,10 @@ The format of the Format Extension area is the following: The MD5 checksum of the entire Header Extension cluster except the first 24 bytes. - The above are followed by feature sections or "extensions". The last - extension must be "End of features" (see below). +The above are followed by feature sections or "extensions". The last +extension must be "End of features" (see below). -Each feature section has the following format: +Each feature section has the following format:: 0 - 7: magic The identifier of the feature: @@ -183,16 +190,17 @@ Each feature section has the following format: variable: data (data_size bytes) - The above is followed by padding to the next 8 bytes boundary, then the - next extension starts. +The above is followed by padding to the next 8 bytes boundary, then the +next extension starts. - The last extension must be "End of features" with all the fields set to 0. +The last extension must be "End of features" with all the fields set to 0. -=== Dirty bitmaps feature === +Dirty bitmaps feature +--------------------- This feature provides a way of storing dirty bitmaps in the image. The fields -of its data area are: +of its data area are:: 0 - 7: size The bitmap size, should be equal to disk size in sectors. @@ -215,7 +223,7 @@ clusters inside the Parallels image file. The offsets of these clusters are saved in the L1 offset table specified by the feature extension. Each L1 table entry is a 64 bit integer as described below: -Given an offset in bytes into the bitmap data, corresponding L1 entry is +Given an offset in bytes into the bitmap data, corresponding L1 entry is:: l1_table[offset / cluster_size] @@ -227,6 +235,6 @@ are assumed to be 1. If an L1 table entry is not 0 or 1, it contains the corresponding cluster offset (in 512b sectors). Given an offset in bytes into the bitmap data the -offset in bytes into the image file can be obtained as follows: +offset in bytes into the image file can be obtained as follows:: offset = l1_table[offset / cluster_size] * 512 + (offset % cluster_size) diff --git a/docs/interop/prl-xml.rst b/docs/interop/prl-xml.rst new file mode 100644 index 0000000000..5bb63bb93a --- /dev/null +++ b/docs/interop/prl-xml.rst @@ -0,0 +1,192 @@ +Parallels Disk Format +===================== + +.. + Copyright (c) 2015-2017, Virtuozzo, Inc. + Authors: + 2015 Denis Lunev + 2015 Vladimir Sementsov-Ogievskiy + 2016-2017 Klim Kireev + 2016-2017 Edgar Kaziakhmedov + + This work is licensed under the terms of the GNU GPL, version 2 or later. + See the COPYING file in the top-level directory. + +This specification contains minimal information about Parallels Disk Format, +which is enough to properly work with QEMU. Nevertheless, Parallels Cloud Server +and Parallels Desktop are able to add some unspecified nodes to the xml and use +them, but they are for internal work and don't affect functionality. Also it +uses auxiliary xml ``Snapshot.xml``, which allows storage of optional snapshot +information, but this doesn't influence open/read/write functionality. QEMU and +other software should not use fields not covered in this document or the +``Snapshot.xml`` file, and must leave them as is. + +A Parallels disk consists of two parts: the set of snapshots and the disk +descriptor file, which stores information about all files and snapshots. + +Definitions +----------- + +Snapshot + a record of the contents captured at a particular time, capable + of storing current state. A snapshot has a UUID and a parent UUID. + +Snapshot image + an overlay representing the difference between this + snapshot and some earlier snapshot. + +Overlay + an image storing the different sectors between two captured states. + +Root image + a snapshot image with no parent, the root of the snapshot tree. + +Storage + the backing storage for a subset of the virtual disk. When + there is more than one storage in a Parallels disk then that + is referred to as a split image. In this case every storage + covers a specific address space area of the disk and has its + particular root image. Split images are not considered here + and are not supported. Each storage consists of disk + parameters and a list of images. The list of images always + contains a root image and may also contain overlays. The + root image can be an expandable Parallels image file or + plain. Overlays must be expandable. + +Description file + ``DiskDescriptor.xml`` stores information about disk parameters, + snapshots, and storages. + +Top Snapshot + The overlay between actual state and some previous snapshot. + It is not a snapshot in the classical sense because it + serves as the active image that the guest writes to. + +Sector + a 512-byte data chunk. + +Description file +---------------- + +All information is placed in a single XML element +``Parallels_disk_image``. +The element has only one attribute, ``Version``, which must be ``1.0``. + +The schema of ``DiskDescriptor.xml``:: + + + + ... + + + ... + + + ... + + + +``Disk_Parameters`` element +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``Disk_Parameters`` element describes the physical layout of the +virtual disk and some general settings. + +The ``Disk_Parameters`` element MUST contain the following child elements: + +* ``Disk_size`` - number of sectors in the disk, + desired size of the disk. +* ``Cylinders`` - number of the disk cylinders. +* ``Heads`` - number of the disk heads. +* ``Sectors`` - number of the disk sectors per cylinder + (sector size is 512 bytes) + Limitation: The product of the ``Heads``, ``Sectors`` and ``Cylinders`` + values MUST be equal to the value of the Disk_size parameter. +* ``Padding`` - must be 0. Parallels Cloud Server and Parallels Desktop may + use padding set to 1; however this case is not covered + by this specification. QEMU and other software should not open + such disks and should not create them. + +``StorageData`` element +^^^^^^^^^^^^^^^^^^^^^^^ + +This element of the file describes the root image and all snapshot images. + +The ``StorageData`` element consists of the ``Storage`` child element, +as shown below:: + + + + ... + + + +A ``Storage`` element has the following child elements: + +* ``Start`` - start sector of the storage, in case of non split storage + equals to 0. +* ``End`` - number of sector following the last sector, in case of non + split storage equals to ``Disk_size``. +* ``Blocksize`` - storage cluster size, number of sectors per one cluster. + The cluster size for each "Compressed" (see below) image in + a parallels disk must be equal to this field. Note: the cluster + size for a Parallels Expandable Image is in the ``tracks`` field of + its header (see :doc:`parallels`). +* Several ``Image`` child elements. + +Each ``Image`` element has the following child elements: + +* ``GUID`` - image identifier, UUID in curly brackets. + For instance, ``{12345678-9abc-def1-2345-6789abcdef12}.`` + The GUID is used by the Snapshots element to reference images + (see below) +* ``Type`` - image type of the element. It can be: + + * ``Plain`` for raw files. + * ``Compressed`` for expanding disks. + +* ``File`` - path to image file. The path can be relative to + ``DiskDescriptor.xml`` or absolute. + +``Snapshots`` element +^^^^^^^^^^^^^^^^^^^^^ + +The ``Snapshots`` element describes the snapshot relations with the snapshot tree. + +The element contains the set of ``Shot`` child elements, as shown below:: + + + ... /* Optional child element */ + + ... + + + ... + + ... + + +Each ``Shot`` element contains the following child elements: + +* ``GUID`` - an image GUID. +* ``ParentGUID`` - GUID of the image of the parent snapshot. + +The software may traverse snapshots from child to parent using the +```` field as reference. The ``ParentGUID`` of the root +snapshot is ``{00000000-0000-0000-0000-000000000000}``. +There should be only one root snapshot. + +The Top snapshot could be +described via two ways: via the ``TopGUID`` child +element of the ``Snapshots`` element, or via the predefined GUID +``{5fbaabe3-6958-40ff-92a7-860e329aab41}``. If ``TopGUID`` is defined, +the predefined GUID is interpreted as a normal GUID. All snapshot images +(except the Top Snapshot) should be +opened read-only. + +There is another predefined GUID, +``BackupID = {704718e1-2314-44c8-9087-d78ed36b0f4e}``, which is used by +original and some third-party software for backup. QEMU and other +software may operate with images with ``GUID = BackupID`` as usual. +However, it is not recommended to use this +GUID for new disks. The Top snapshot cannot have this GUID. diff --git a/docs/interop/prl-xml.txt b/docs/interop/prl-xml.txt deleted file mode 100644 index cf9b3fba26..0000000000 --- a/docs/interop/prl-xml.txt +++ /dev/null @@ -1,158 +0,0 @@ -= License = - -Copyright (c) 2015-2017, Virtuozzo, Inc. -Authors: - 2015 Denis Lunev - 2015 Vladimir Sementsov-Ogievskiy - 2016-2017 Klim Kireev - 2016-2017 Edgar Kaziakhmedov - -This work is licensed under the terms of the GNU GPL, version 2 or later. -See the COPYING file in the top-level directory. - -This specification contains minimal information about Parallels Disk Format, -which is enough to proper work with QEMU. Nevertheless, Parallels Cloud Server -and Parallels Desktop are able to add some unspecified nodes to xml and use -them, but they are for internal work and don't affect functionality. Also it -uses auxiliary xml "Snapshot.xml", which allows to store optional snapshot -information, but it doesn't influence open/read/write functionality. QEMU and -other software should not use fields not covered in this document and -Snapshot.xml file and must leave them as is. - -= Parallels Disk Format = - -Parallels disk consists of two parts: the set of snapshots and the disk -descriptor file, which stores information about all files and snapshots. - -== Definitions == - Snapshot a record of the contents captured at a particular time, - capable of storing current state. A snapshot has UUID and - parent UUID. - - Snapshot image an overlay representing the difference between this - snapshot and some earlier snapshot. - - Overlay an image storing the different sectors between two captured - states. - - Root image snapshot image with no parent, the root of snapshot tree. - - Storage the backing storage for a subset of the virtual disk. When - there is more than one storage in a Parallels disk then that - is referred to as a split image. In this case every storage - covers specific address space area of the disk and has its - particular root image. Split images are not considered here - and are not supported. Each storage consists of disk - parameters and a list of images. The list of images always - contains a root image and may also contain overlays. The - root image can be an expandable Parallels image file or - plain. Overlays must be expandable. - - Description DiskDescriptor.xml stores information about disk parameters, - file snapshots, storages. - - Top The overlay between actual state and some previous snapshot. - Snapshot It is not a snapshot in the classical sense because it - serves as the active image that the guest writes to. - - Sector a 512-byte data chunk. - -== Description file == -All information is placed in a single XML element Parallels_disk_image. -The element has only one attribute "Version", that must be 1.0. -Schema of DiskDescriptor.xml: - - - - ... - - - ... - - - ... - - - -== Disk_Parameters element == -The Disk_Parameters element describes the physical layout of the virtual disk -and some general settings. - -The Disk_Parameters element MUST contain the following child elements: - * Disk_size - number of sectors in the disk, - desired size of the disk. - * Cylinders - number of the disk cylinders. - * Heads - number of the disk heads. - * Sectors - number of the disk sectors per cylinder - (sector size is 512 bytes) - Limitation: Product of the Heads, Sectors and Cylinders - values MUST be equal to the value of the Disk_size parameter. - * Padding - must be 0. Parallels Cloud Server and Parallels Desktop may - use padding set to 1, however this case is not covered - by this spec, QEMU and other software should not open - such disks and should not create them. - -== StorageData element == -This element of the file describes the root image and all snapshot images. - -The StorageData element consists of the Storage child element, as shown below: - - - ... - - - -A Storage element has following child elements: - * Start - start sector of the storage, in case of non split storage - equals to 0. - * End - number of sector following the last sector, in case of non - split storage equals to Disk_size. - * Blocksize - storage cluster size, number of sectors per one cluster. - Cluster size for each "Compressed" (see below) image in - parallels disk must be equal to this field. Note: cluster - size for Parallels Expandable Image is in 'tracks' field of - its header (see docs/interop/parallels.txt). - * Several Image child elements. - -Each Image element has following child elements: - * GUID - image identifier, UUID in curly brackets. - For instance, {12345678-9abc-def1-2345-6789abcdef12}. - The GUID is used by the Snapshots element to reference images - (see below) - * Type - image type of the element. It can be: - "Plain" for raw files. - "Compressed" for expanding disks. - * File - path to image file. Path can be relative to DiskDescriptor.xml or - absolute. - -== Snapshots element == -The Snapshots element describes the snapshot relations with the snapshot tree. - -The element contains the set of Shot child elements, as shown below: - - ... /* Optional child element */ - - ... - - - ... - - ... - - -Each Shot element contains the following child elements: - * GUID - an image GUID. - * ParentGUID - GUID of the image of the parent snapshot. - -The software may traverse snapshots from child to parent using -field as reference. ParentGUID of root snapshot is -{00000000-0000-0000-0000-000000000000}. There should be only one root -snapshot. Top snapshot could be described via two ways: via TopGUID child -element of the Snapshots element or via predefined GUID -{5fbaabe3-6958-40ff-92a7-860e329aab41}. If TopGUID is defined, predefined GUID is -interpreted as usual GUID. All snapshot images (except Top Snapshot) should be -opened read-only. There is another predefined GUID, -BackupID = {704718e1-2314-44c8-9087-d78ed36b0f4e}, which is used by original and -some third-party software for backup, QEMU and other software may operate with -images with GUID = BackupID as usual, however, it is not recommended to use this -GUID for new disks. Top snapshot cannot have this GUID. diff --git a/docs/specs/index.rst b/docs/specs/index.rst index be899b49c2..6495ed5ed9 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -35,3 +35,4 @@ guest hardware that is specific to QEMU. vmcoreinfo vmgenid rapl-msr + rocker diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.rst similarity index 91% rename from docs/specs/rocker.txt rename to docs/specs/rocker.rst index 1857b31703..3a7fc6a7e0 100644 --- a/docs/specs/rocker.txt +++ b/docs/specs/rocker.rst @@ -1,23 +1,23 @@ Rocker Network Switch Register Programming Guide -Copyright (c) Scott Feldman -Copyright (c) Neil Horman -Version 0.11, 12/29/2014 +************************************************ -LICENSE -======= +.. + Copyright (c) Scott Feldman + Copyright (c) Neil Horman + Version 0.11, 12/29/2014 -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -SECTION 1: Introduction -======================= +Introduction +============ Overview -------- @@ -29,25 +29,25 @@ software. Notations and Conventions ------------------------- -o In register descriptions, [n:m] indicates a range from bit n to bit m, -inclusive. -o Use of leading 0x indicates a hexadecimal number. -o Use of leading 0b indicates a binary number. -o The use of RSVD or Reserved indicates that a bit or field is reserved for -future use. -o Field width is in bytes, unless otherwise noted. -o Register are (R) read-only, (R/W) read/write, (W) write-only, or (COR) clear -on read -o TLV values in network-byte-order are designated with (N). +* In register descriptions, [n:m] indicates a range from bit n to bit m, + inclusive. +* Use of leading 0x indicates a hexadecimal number. +* Use of leading 0b indicates a binary number. +* The use of RSVD or Reserved indicates that a bit or field is reserved for + future use. +* Field width is in bytes, unless otherwise noted. +* Register are (R) read-only, (R/W) read/write, (W) write-only, or (COR) clear + on read +* TLV values in network-byte-order are designated with (N). -SECTION 2: PCI Configuration Registers -====================================== +PCI Configuration Registers +=========================== PCI Configuration Space ----------------------- -Each switch instance registers as a PCI device with PCI configuration space: +Each switch instance registers as a PCI device with PCI configuration space:: offset width description value --------------------------------------------- @@ -74,11 +74,10 @@ Each switch instance registers as a PCI device with PCI configuration space: 0x41 1 Retry count 0x42 2 Reserved + * Assigned by sub-system implementation -* Assigned by sub-system implementation - -SECTION 3: Memory-Mapped Register Space -======================================= +Memory-Mapped Register Space +============================ There are two memory-mapped BARs. BAR0 maps device register space and is 0x2000 in size. BAR1 maps MSI-X vector and PBA tables and is also 0x2000 in @@ -89,7 +88,7 @@ byte registers with one 4-byte access, and 8 byte registers with either two 4-byte accesses or a single 8-byte access. In the case of two 4-byte accesses, access must be lower and then upper 4-bytes, in that order. -BAR0 device register space is organized as follows: +BAR0 device register space is organized as follows:: offset description ------------------------------------------------------ @@ -105,7 +104,7 @@ Reads to reserved registers read back as 0. No fancy stuff like write-combining is enabled on any of the registers. -BAR1 MSI-X register space is organized as follows: +BAR1 MSI-X register space is organized as follows:: offset description ------------------------------------------------------ @@ -113,8 +112,8 @@ BAR1 MSI-X register space is organized as follows: 0x1000-0x1fff MSI-X PBA table -SECTION 4: Interrupts, DMA, and Endianness -========================================== +Interrupts, DMA, and Endianness +=============================== PCI Interrupts -------------- @@ -122,7 +121,7 @@ PCI Interrupts The device supports only MSI-X interrupts. BAR1 memory-mapped region contains the MSI-X vector and PBA tables, with support for up to 256 MSI-X vectors. -The vector assignment is: +The vector assignment is:: vector description ----------------------------------------------------- @@ -134,7 +133,7 @@ The vector assignment is: Tx vector is even Rx vector is odd -A MSI-X vector table entry is 16 bytes: +A MSI-X vector table entry is 16 bytes:: field offset width description ------------------------------------------------------------- @@ -170,7 +169,7 @@ ring, and hardware will set this bit when the descriptor is complete. Descriptor ring sizes must be a power of 2 and range from 2 to 64K entries. Descriptor rings' base address must be 8-byte aligned. Descriptors must be packed within ring. Each descriptor in each ring must also be aligned on an 8 -byte boundary. Each descriptor ring will have these registers: +byte boundary. Each descriptor ring will have these registers:: DMA_DESC_xxx_BASE_ADDR, offset 0x1000 + (x * 32), 64-bit, (R/W) DMA_DESC_xxx_SIZE, offset 0x1008 + (x * 32), 32-bit, (R/W) @@ -180,7 +179,7 @@ byte boundary. Each descriptor ring will have these registers: DMA_DESC_xxx_CREDITS, offset 0x1018 + (x * 32), 32-bit, (R/W) DMA_DESC_xxx_RSVD1, offset 0x101c + (x * 32), 32-bit, (R/W) -Where x is descriptor ring index: +Where x is descriptor ring index:: index ring -------------------- @@ -203,14 +202,14 @@ written past TAIL. To do so would wrap the ring. An empty ring is when HEAD == TAIL. A full ring is when HEAD is one position behind TAIL. Both HEAD and TAIL increment and modulo wrap at the ring size. -CTRL register bits: +CTRL register bits:: bit name description ------------------------------------------------------------------------ [0] CTRL_RESET Reset the descriptor ring [1:31] Reserved -All descriptor types share some common fields: +All descriptor types share some common fields:: field width description ------------------------------------------------------------------- @@ -234,7 +233,7 @@ filled in by the switch. Likewise, the switch will ignore unknown fields filled in by software. Descriptor payload buffer is 8-byte aligned and TLVs are 8-byte aligned. The -value within a TLV is also 8-byte aligned. The (packed, 8 byte) TLV header is: +value within a TLV is also 8-byte aligned. The (packed, 8 byte) TLV header is:: field width description ----------------------------- @@ -246,7 +245,7 @@ The alignment requirements for descriptors and TLVs are to avoid unaligned access exceptions in software. Note that the payload for each TLV is also 8 byte aligned. -Figure 1 shows an example descriptor buffer with two TLVs. +Figure 1 shows an example descriptor buffer with two TLVs:: <------- 8 bytes -------> @@ -316,11 +315,11 @@ network packet data. All non-network-packet TLV multi-byte values will be LE. TLV values in network-byte-order are designated with (N). -SECTION 5: Test Registers -========================= +Test Registers +============== Rocker has several test registers to support troubleshooting register access, -interrupt generation, and DMA operations: +interrupt generation, and DMA operations:: TEST_REG, offset 0x0010, 32-bit (R/W) TEST_REG64, offset 0x0018, 64-bit (R/W) @@ -338,7 +337,7 @@ for that vector. To test basic DMA operations, allocate a DMA-able host buffer and put the buffer address into TEST_DMA_ADDR and size into TEST_DMA_SIZE. Then, write to -TEST_DMA_CTRL to manipulate the buffer contents. TEST_DMA_CTRL operations are: +TEST_DMA_CTRL to manipulate the buffer contents. TEST_DMA_CTRL operations are:: operation value description ----------------------------------------------------------- @@ -351,14 +350,14 @@ issue exists. In particular, buffers that start on odd-8-byte boundary and/or span multiple PAGE sizes should be tested. -SECTION 6: Ports -================ +Ports +===== Physical and Logical Ports ------------------------------------ The switch supports up to 62 physical (front-panel) ports. Register -PORT_PHYS_COUNT returns the actual number of physical ports available: +PORT_PHYS_COUNT returns the actual number of physical ports available:: PORT_PHYS_COUNT, offset 0x0304, 32-bit, (R) @@ -369,7 +368,7 @@ Front-panel ports and logical tunnel ports are mapped into a single 32-bit port space. A special CPU port is assigned port 0. The front-panel ports are mapped to ports 1-62. A special loopback port is assigned port 63. Logical tunnel ports are assigned ports 0x0001000-0x0001ffff. -To summarize the port assignments: +To summarize the port assignments:: port mapping ------------------------------------------------------- @@ -391,14 +390,14 @@ set/get the mode for front-panel ports, see port settings, below. Port Settings ------------- -Link status for all front-panel ports is available via PORT_PHYS_LINK_STATUS: +Link status for all front-panel ports is available via PORT_PHYS_LINK_STATUS:: PORT_PHYS_LINK_STATUS, offset 0x0310, 64-bit, (R) Value is port bitmap. Bits 0 and 63 always read 0. Bits 1-62 read 1 for link UP and 0 for link DOWN for respective front-panel ports. -Other properties for front-panel ports are available via DMA CMD descriptors: +Other properties for front-panel ports are available via DMA CMD descriptors:: Get PORT_SETTINGS descriptor: @@ -438,7 +437,7 @@ Port Enable ----------- Front-panel ports are initially disabled, which means port ingress and egress -packets will be dropped. To enable or disable a port, use PORT_PHYS_ENABLE: +packets will be dropped. To enable or disable a port, use PORT_PHYS_ENABLE:: PORT_PHYS_ENABLE: offset 0x0318, 64-bit, (R/W) @@ -447,15 +446,15 @@ packets will be dropped. To enable or disable a port, use PORT_PHYS_ENABLE: Default is 0. -SECTION 7: Switch Control -========================= +Switch Control +============== This section covers switch-wide register settings. Control ------- -This register is used for low level control of the switch. +This register is used for low level control of the switch:: CONTROL: offset 0x0300, 32-bit, (W) @@ -468,18 +467,18 @@ Switch ID --------- The switch has a SWITCH_ID to be used by software to uniquely identify the -switch: +switch:: SWITCH_ID: offset 0x0320, 64-bit, (R) Value is opaque to switch software and no special encoding is implied. -SECTION 8: Events -================= +Events +====== Non-I/O asynchronous events from the device are notified to the host using the -event ring. The TLV structure for events is: +event ring. The TLV structure for events is:: field width description --------------------------------------------------- @@ -491,7 +490,7 @@ event ring. The TLV structure for events is: Link Changed Event ------------------ -When link status changes on a physical port, this event is generated. +When link status changes on a physical port, this event is generated:: field width description --------------------------------------------------- @@ -510,6 +509,8 @@ driver should install to the device the MAC/VLAN on the port into the bridge table. Once installed, the MAC/VLAN is known on the port and this event will no longer be generated. +:: + field width description --------------------------------------------------- INFO @@ -518,8 +519,8 @@ no longer be generated. VLAN 2 VLAN ID -SECTION 9: CPU Packet Processing -================================ +CPU Packet Processing +===================== Ingress packets directed to the host CPU for further processing are delivered in the DMA RX ring. Likewise, host CPU originating packets destined to egress @@ -540,7 +541,7 @@ software that Tx is complete and software resources (e.g. skb) backing packet can be released. Figure 2 shows an example 3-fragment packet queued with one Tx descriptor. A -TLV is used for each packet fragment. +TLV is used for each packet fragment:: pkt frag 1 +–––––––+ +–+ @@ -570,7 +571,7 @@ TLV is used for each packet fragment. fig 2. -The TLVs for Tx descriptor buffer are: +The TLVs for Tx descriptor buffer are:: field width description --------------------------------------------------------------------- @@ -600,7 +601,7 @@ The TLVs for Tx descriptor buffer are: TX_FRAG_ADDR 8 DMA address of packet fragment TX_FRAG_LEN 2 Packet fragment length -Possible status return codes in descriptor on completion are: +Possible status return codes in descriptor on completion are:: DESC_COMP_ERR reason -------------------------------------------------------------------- @@ -623,7 +624,7 @@ worst-case packet size. A single Rx descriptor will contain the entire Rx packet data in one RX_FRAG. Other Rx TLVs describe and hardware offloads performed on the packet, such as checksum validation. -The TLVs for Rx descriptor buffer are: +The TLVs for Rx descriptor buffer are:: field width description --------------------------------------------------- @@ -649,7 +650,7 @@ The TLVs for Rx descriptor buffer are: Offload forward RX_FLAG indicates the device has already forwarded the packet so the host CPU should not also forward the packet. -Possible status return codes in descriptor on completion are: +Possible status return codes in descriptor on completion are:: DESC_COMP_ERR reason -------------------------------------------------------------------- @@ -660,14 +661,14 @@ Possible status return codes in descriptor on completion are: packet data TLV and other TLVs. -SECTION 10: OF-DPA Mode -====================== +OF-DPA Mode +=========== OF-DPA mode allows the switch to offload flow packet processing functions to hardware. An OpenFlow controller would communicate with an OpenFlow agent installed on the switch. The OpenFlow agent would (directly or indirectly) communicate with the Rocker switch driver, which in turn would program switch -hardware with flow functionality, as defined in OF-DPA. The block diagram is: +hardware with flow functionality, as defined in OF-DPA. The block diagram is:: +–––––––––––––––----–––+ | OF | @@ -696,14 +697,14 @@ OF-DPA Flow Table Interface There are commands to add, modify, delete, and get stats of flow table entries. The commands are issued using the DMA CMD descriptor ring. The following -commands are defined: +commands are defined:: CMD_ADD: add an entry to flow table CMD_MOD: modify an entry in flow table CMD_DEL: delete an entry from flow table CMD_GET_STATS: get stats for flow entry -TLVs for add and modify commands are: +TLVs for add and modify commands are:: field width description ---------------------------------------------------- @@ -723,14 +724,14 @@ TLVs for add and modify commands are: Additional TLVs based on flow table ID: -Table ID 0: ingress port +Table ID 0: ingress port:: field width description ---------------------------------------------------- OF_DPA_IN_PPORT 4 ingress physical port number OF_DPA_GOTO_TBL 2 goto table ID; zero to drop -Table ID 10: vlan +Table ID 10: vlan:: field width description ---------------------------------------------------- @@ -740,7 +741,7 @@ Table ID 10: vlan OF_DPA_GOTO_TBL 2 goto table ID; zero to drop OF_DPA_NEW_VLAN_ID 2 (N) new vlan ID -Table ID 20: termination mac +Table ID 20: termination mac:: field width description ---------------------------------------------------- @@ -757,7 +758,7 @@ Table ID 20: termination mac OF_DPA_OUT_PPORT 2 if specified, must be controller, set zero otherwise -Table ID 30: unicast routing +Table ID 30: unicast routing:: field width description ---------------------------------------------------- @@ -772,7 +773,7 @@ Table ID 30: unicast routing OF_DPA_GROUP_ID 4 data for GROUP action must be an L3 Unicast group entry -Table ID 40: multicast routing +Table ID 40: multicast routing:: field width description ---------------------------------------------------- @@ -797,7 +798,7 @@ Table ID 40: multicast routing OF_DPA_GROUP_ID 4 data for GROUP action must be an L3 multicast group entry -Table ID 50: bridging +Table ID 50: bridging:: field width description ---------------------------------------------------- @@ -818,7 +819,7 @@ Table ID 50: bridging restricted to CONTROLLER, set to 0 otherwise -Table ID 60: acl policy +Table ID 60: acl policy:: field width description ---------------------------------------------------- @@ -890,7 +891,7 @@ Table ID 60: acl policy dropped (all other instructions ignored) -TLVs for flow delete and get stats command are: +TLVs for flow delete and get stats command are:: field width description --------------------------------------------------- @@ -898,7 +899,7 @@ TLVs for flow delete and get stats command are: OF_DPA_COOKIE 8 Cookie On completion of get stats command, the descriptor buffer is written back with -the following TLVs: +the following TLVs:: field width description --------------------------------------------------- @@ -906,7 +907,7 @@ the following TLVs: OF_DPA_STAT_RX_PKTS 8 Received packets OF_DPA_STAT_TX_PKTS 8 Transmit packets -Possible status return codes in descriptor on completion are: +Possible status return codes in descriptor on completion are:: DESC_COMP_ERR command reason -------------------------------------------------------------------- @@ -928,14 +929,14 @@ Group Table Interface There are commands to add, modify, delete, and get stats of group table entries. The commands are issued using the DMA CMD descriptor ring. The -following commands are defined: +following commands are defined:: CMD_ADD: add an entry to group table CMD_MOD: modify an entry in group table CMD_DEL: delete an entry from group table CMD_GET_STATS: get stats for group entry -TLVs for add and modify commands are: +TLVs for add and modify commands are:: field width description ----------------------------------------------------------- @@ -969,7 +970,7 @@ TLVs for add and modify commands are: FLOW_SRC_MAC 6 (types 1, 2, 5) FLOW_DST_MAC 6 (types 1, 2) -TLVs for flow delete and get stats command are: +TLVs for flow delete and get stats command are:: field width description ----------------------------------------------------------- @@ -977,7 +978,7 @@ TLVs for flow delete and get stats command are: FLOW_GROUP_ID 2 Flow group ID On completion of get stats command, the descriptor buffer is written back with -the following TLVs: +the following TLVs:: field width description --------------------------------------------------- @@ -986,7 +987,7 @@ the following TLVs: FLOW_STAT_REF_COUNT 4 Flow reference count FLOW_STAT_BUCKET_COUNT 4 Flow bucket count -Possible status return codes in descriptor on completion are: +Possible status return codes in descriptor on completion are:: DESC_COMP_ERR command reason -------------------------------------------------------------------- diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index e10cad86dd..f76fb117ad 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -154,10 +154,10 @@ static void acpi_dsdt_add_gpio(Aml *scope, const MemMapEntry *gpio_memmap, aml_append(dev, aml_name_decl("_CRS", crs)); Aml *aei = aml_resource_template(); - /* Pin 3 for power button */ - const uint32_t pin_list[1] = {3}; + + const uint32_t pin = GPIO_PIN_POWER_BUTTON; aml_append(aei, aml_gpio_int(AML_CONSUMER, AML_EDGE, AML_ACTIVE_HIGH, - AML_EXCLUSIVE, AML_PULL_UP, 0, pin_list, 1, + AML_EXCLUSIVE, AML_PULL_UP, 0, &pin, 1, "GPO0", NULL, 0)); aml_append(dev, aml_name_decl("_AEI", aei)); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 719e83e6a1..687fe0bb8b 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1004,7 +1004,7 @@ static void virt_powerdown_req(Notifier *n, void *opaque) if (s->acpi_dev) { acpi_send_event(s->acpi_dev, ACPI_POWER_DOWN_STATUS); } else { - /* use gpio Pin 3 for power button event */ + /* use gpio Pin for power button event */ qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1); } } @@ -1013,7 +1013,8 @@ static void create_gpio_keys(char *fdt, DeviceState *pl061_dev, uint32_t phandle) { gpio_key_dev = sysbus_create_simple("gpio-key", -1, - qdev_get_gpio_in(pl061_dev, 3)); + qdev_get_gpio_in(pl061_dev, + GPIO_PIN_POWER_BUTTON)); qemu_fdt_add_subnode(fdt, "/gpio-keys"); qemu_fdt_setprop_string(fdt, "/gpio-keys", "compatible", "gpio-keys"); @@ -1024,7 +1025,7 @@ static void create_gpio_keys(char *fdt, DeviceState *pl061_dev, qemu_fdt_setprop_cell(fdt, "/gpio-keys/poweroff", "linux,code", KEY_POWER); qemu_fdt_setprop_cells(fdt, "/gpio-keys/poweroff", - "gpios", phandle, 3, 0); + "gpios", phandle, GPIO_PIN_POWER_BUTTON, 0); } #define SECURE_GPIO_POWEROFF 0 diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c index b1517592c6..1d8964d804 100644 --- a/hw/core/ptimer.c +++ b/hw/core/ptimer.c @@ -83,7 +83,7 @@ static void ptimer_reload(ptimer_state *s, int delta_adjust) delta = s->delta = s->limit; } - if (s->period == 0) { + if (s->period == 0 && s->period_frac == 0) { if (!qtest_enabled()) { fprintf(stderr, "Timer with period zero, disabling\n"); } @@ -309,7 +309,7 @@ void ptimer_run(ptimer_state *s, int oneshot) assert(s->in_transaction); - if (was_disabled && s->period == 0) { + if (was_disabled && s->period == 0 && s->period_frac == 0) { if (!qtest_enabled()) { fprintf(stderr, "Timer with period zero, disabling\n"); } diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index ab961bb6a9..a4d937ed45 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -47,6 +47,9 @@ /* See Linux kernel arch/arm64/include/asm/pvclock-abi.h */ #define PVTIME_SIZE_PER_CPU 64 +/* GPIO pins */ +#define GPIO_PIN_POWER_BUTTON 3 + enum { VIRT_FLASH, VIRT_MEM, diff --git a/include/qapi/qmp/qobject.h b/include/qapi/qmp/qobject.h index 89b97d88bc..256d782688 100644 --- a/include/qapi/qmp/qobject.h +++ b/include/qapi/qmp/qobject.h @@ -54,7 +54,7 @@ struct QObject { typeof(obj) _obj = (obj); \ _obj ? container_of(&_obj->base, QObject, base) : NULL; \ }) -#define QOBJECT(obj) QOBJECT_INTERNAL((obj), MAKE_IDENTFIER(_obj)) +#define QOBJECT(obj) QOBJECT_INTERNAL((obj), MAKE_IDENTIFIER(_obj)) /* Required for qobject_to() */ #define QTYPE_CAST_TO_QNull QTYPE_QNULL diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index dc4118ddd9..7a3f2e6576 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -128,7 +128,7 @@ _val; \ }) #define qatomic_rcu_read(ptr) \ - qatomic_rcu_read_internal((ptr), MAKE_IDENTFIER(_val)) + qatomic_rcu_read_internal((ptr), MAKE_IDENTIFIER(_val)) #define qatomic_rcu_set(ptr, i) do { \ qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \ diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 554c5ce7df..c06954ccb4 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -38,7 +38,7 @@ #endif /* Expands into an identifier stemN, where N is another number each time */ -#define MAKE_IDENTFIER(stem) glue(stem, __COUNTER__) +#define MAKE_IDENTIFIER(stem) glue(stem, __COUNTER__) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 4cc4c32b14..fe7c3c5f67 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -399,7 +399,7 @@ void QEMU_ERROR("code path is reachable") }) #undef MIN #define MIN(a, b) \ - MIN_INTERNAL((a), (b), MAKE_IDENTFIER(_a), MAKE_IDENTFIER(_b)) + MIN_INTERNAL((a), (b), MAKE_IDENTIFIER(_a), MAKE_IDENTIFIER(_b)) #define MAX_INTERNAL(a, b, _a, _b) \ ({ \ @@ -408,7 +408,7 @@ void QEMU_ERROR("code path is reachable") }) #undef MAX #define MAX(a, b) \ - MAX_INTERNAL((a), (b), MAKE_IDENTFIER(_a), MAKE_IDENTFIER(_b)) + MAX_INTERNAL((a), (b), MAKE_IDENTIFIER(_a), MAKE_IDENTIFIER(_b)) #ifdef __COVERITY__ # define MIN_CONST(a, b) ((a) < (b) ? (a) : (b)) @@ -440,7 +440,7 @@ void QEMU_ERROR("code path is reachable") _a == 0 ? _b : (_b == 0 || _b > _a) ? _a : _b; \ }) #define MIN_NON_ZERO(a, b) \ - MIN_NON_ZERO_INTERNAL((a), (b), MAKE_IDENTFIER(_a), MAKE_IDENTFIER(_b)) + MIN_NON_ZERO_INTERNAL((a), (b), MAKE_IDENTIFIER(_a), MAKE_IDENTIFIER(_b)) /* * Round number down to multiple. Safe when m is not a power of 2 (see diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index c60d2a7ec9..21a9abd90a 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -1877,3 +1877,42 @@ void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, { do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); } + +static bool is_guarded_page(CPUARMState *env, target_ulong addr, uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + return page_get_flags(addr) & PAGE_BTI; +#else + CPUTLBEntryFull *full; + void *host; + int mmu_idx = cpu_mmu_index(env_cpu(env), true); + int flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, + false, &host, &full, ra); + + assert(!(flags & TLB_INVALID_MASK)); + return full->extra.arm.guarded; +#endif +} + +void HELPER(guarded_page_check)(CPUARMState *env) +{ + /* + * We have already verified that bti is enabled, and that the + * instruction at PC is not ok for BTYPE. This is always at + * the beginning of a block, so PC is always up-to-date and + * no unwind is required. + */ + if (is_guarded_page(env, env->pc, 0)) { + raise_exception(env, EXCP_UDEF, syn_btitrap(env->btype), + exception_target_el(env)); + } +} + +void HELPER(guarded_page_br)(CPUARMState *env, target_ulong pc) +{ + /* + * We have already checked for branch via x16 and x17. + * What remains for choosing BTYPE is checking for a guarded page. + */ + env->btype = is_guarded_page(env, pc, GETPC()) ? 3 : 1; +} diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index 371388f61b..481007bf39 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -133,6 +133,9 @@ DEF_HELPER_4(cpyfp, void, env, i32, i32, i32) DEF_HELPER_4(cpyfm, void, env, i32, i32, i32) DEF_HELPER_4(cpyfe, void, env, i32, i32, i32) +DEF_HELPER_FLAGS_1(guarded_page_check, TCG_CALL_NO_WG, void, env) +DEF_HELPER_FLAGS_2(guarded_page_br, TCG_CALL_NO_RWG, void, env, tl) + DEF_HELPER_FLAGS_5(gvec_fdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 148be2826e..28a1013503 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -1507,7 +1507,14 @@ static void set_btype_for_br(DisasContext *s, int rn) { if (dc_isar_feature(aa64_bti, s)) { /* BR to {x16,x17} or !guard -> 1, else 3. */ - set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); + if (rn == 16 || rn == 17) { + set_btype(s, 1); + } else { + TCGv_i64 pc = tcg_temp_new_i64(); + gen_pc_plus_diff(s, pc, 0); + gen_helper_guarded_page_br(tcg_env, pc); + s->btype = -1; + } } } @@ -1521,8 +1528,8 @@ static void set_btype_for_blr(DisasContext *s) static bool trans_BR(DisasContext *s, arg_r *a) { - gen_a64_set_pc(s, cpu_reg(s, a->rn)); set_btype_for_br(s, a->rn); + gen_a64_set_pc(s, cpu_reg(s, a->rn)); s->base.is_jmp = DISAS_JUMP; return true; } @@ -1581,8 +1588,8 @@ static bool trans_BRAZ(DisasContext *s, arg_braz *a) } dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); - gen_a64_set_pc(s, dst); set_btype_for_br(s, a->rn); + gen_a64_set_pc(s, dst); s->base.is_jmp = DISAS_JUMP; return true; } @@ -11878,37 +11885,6 @@ static bool trans_FAIL(DisasContext *s, arg_OK *a) return true; } -/** - * is_guarded_page: - * @env: The cpu environment - * @s: The DisasContext - * - * Return true if the page is guarded. - */ -static bool is_guarded_page(CPUARMState *env, DisasContext *s) -{ - uint64_t addr = s->base.pc_first; -#ifdef CONFIG_USER_ONLY - return page_get_flags(addr) & PAGE_BTI; -#else - CPUTLBEntryFull *full; - void *host; - int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); - int flags; - - /* - * We test this immediately after reading an insn, which means - * that the TLB entry must be present and valid, and thus this - * access will never raise an exception. - */ - flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, - false, &host, &full, 0); - assert(!(flags & TLB_INVALID_MASK)); - - return full->extra.arm.guarded; -#endif -} - /** * btype_destination_ok: * @insn: The instruction at the branch destination @@ -12151,19 +12127,6 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) if (dc_isar_feature(aa64_bti, s)) { if (s->base.num_insns == 1) { - /* - * At the first insn of the TB, compute s->guarded_page. - * We delayed computing this until successfully reading - * the first insn of the TB, above. This (mostly) ensures - * that the softmmu tlb entry has been populated, and the - * page table GP bit is available. - * - * Note that we need to compute this even if btype == 0, - * because this value is used for BR instructions later - * where ENV is not available. - */ - s->guarded_page = is_guarded_page(env, s); - /* First insn can have btype set to non-zero. */ tcg_debug_assert(s->btype >= 0); @@ -12172,12 +12135,13 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * priority -- below debugging exceptions but above most * everything else. This allows us to handle this now * instead of waiting until the insn is otherwise decoded. + * + * We can check all but the guarded page check here; + * defer the latter to a helper. */ if (s->btype != 0 - && s->guarded_page && !btype_destination_ok(insn, s->bt, s->btype)) { - gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); - return; + gen_helper_guarded_page_check(tcg_env); } } else { /* Not the first insn: btype must be 0. */ diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index a8672c857c..01c217f4a4 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -163,8 +163,6 @@ typedef struct DisasContext { uint8_t dcz_blocksize; /* A copy of cpu->gm_blocksize. */ uint8_t gm_blocksize; - /* True if this page is guarded. */ - bool guarded_page; /* True if the current insn_start has been updated. */ bool insn_start_updated; /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ diff --git a/tests/unit/ptimer-test.c b/tests/unit/ptimer-test.c index 04b5f4e3d0..08240594bb 100644 --- a/tests/unit/ptimer-test.c +++ b/tests/unit/ptimer-test.c @@ -763,6 +763,33 @@ static void check_oneshot_with_load_0(gconstpointer arg) ptimer_free(ptimer); } +static void check_freq_more_than_1000M(gconstpointer arg) +{ + const uint8_t *policy = arg; + ptimer_state *ptimer = ptimer_init(ptimer_trigger, NULL, *policy); + bool no_round_down = (*policy & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN); + + triggered = false; + + ptimer_transaction_begin(ptimer); + ptimer_set_freq(ptimer, 2000000000); + ptimer_set_limit(ptimer, 8, 1); + ptimer_run(ptimer, 1); + ptimer_transaction_commit(ptimer); + + qemu_clock_step(3); + + g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 3 : 2); + g_assert_false(triggered); + + qemu_clock_step(1); + + g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0); + g_assert_true(triggered); + + ptimer_free(ptimer); +} + static void add_ptimer_tests(uint8_t policy) { char policy_name[256] = ""; @@ -857,6 +884,12 @@ static void add_ptimer_tests(uint8_t policy) policy_name), g_memdup2(&policy, 1), check_oneshot_with_load_0, g_free); g_free(tmp); + + g_test_add_data_func_full( + tmp = g_strdup_printf("/ptimer/freq_more_than_1000M policy=%s", + policy_name), + g_memdup2(&policy, 1), check_freq_more_than_1000M, g_free); + g_free(tmp); } static void add_all_ptimer_policies_comb_tests(void)