Merge tag 'v1.6.0' into xbox

Conflicts:
	block/Makefile.objs
	configure
	hw/Makefile.objs
	hw/audio/ac97.c
	hw/i386/Makefile.objs
	hw/ide/core.c
	ui/console.c
This commit is contained in:
espes 2013-08-19 22:43:52 +10:00
commit ff3f9be0e2
1628 changed files with 92887 additions and 37767 deletions

8
.gitignore vendored
View File

@ -6,7 +6,9 @@ config-target.*
trace/generated-tracers.h
trace/generated-tracers.c
trace/generated-tracers-dtrace.h
trace/generated-tracers-dtrace.dtrace
trace/generated-tracers.dtrace
trace/generated-events.h
trace/generated-events.c
libcacard/trace/generated-tracers.c
*-timestamp
*-softmmu
@ -80,6 +82,8 @@ fsdev/virtfs-proxy-helper.pod
*.swp
*.orig
.pc
*.gcda
*.gcno
patches
pc-bios/bios-pq/status
pc-bios/vgabios-pq/status
@ -95,6 +99,8 @@ pc-bios/optionrom/kvmvapic.asm
pc-bios/optionrom/kvmvapic.bin
pc-bios/optionrom/kvmvapic.raw
pc-bios/optionrom/kvmvapic.img
pc-bios/s390-ccw/s390-ccw.elf
pc-bios/s390-ccw/s390-ccw.img
.stgit-*
cscope.*
tags

5
.gitmodules vendored
View File

@ -15,10 +15,13 @@
url = git://git.qemu.org/openbios.git
[submodule "roms/qemu-palcode"]
path = roms/qemu-palcode
url = git://repo.or.cz/qemu-palcode.git
url = git://github.com/rth7680/qemu-palcode.git
[submodule "roms/sgabios"]
path = roms/sgabios
url = git://git.qemu.org/sgabios.git
[submodule "pixman"]
path = pixman
url = git://anongit.freedesktop.org/pixman
[submodule "dtc"]
path = dtc
url = git://git.qemu.org/dtc.git

26
HACKING
View File

@ -40,8 +40,23 @@ speaking, the size of guest memory can always fit into ram_addr_t but
it would not be correct to store an actual guest physical address in a
ram_addr_t.
Use target_ulong (or abi_ulong) for CPU virtual addresses, however
devices should not need to use target_ulong.
For CPU virtual addresses there are several possible types.
vaddr is the best type to use to hold a CPU virtual address in
target-independent code. It is guaranteed to be large enough to hold a
virtual address for any target, and it does not change size from target
to target. It is always unsigned.
target_ulong is a type the size of a virtual address on the CPU; this means
it may be 32 or 64 bits depending on which target is being built. It should
therefore be used only in target-specific code, and in some
performance-critical built-per-target core code such as the TLB code.
There is also a signed version, target_long.
abi_ulong is for the *-user targets, and represents a type the size of
'void *' in that target's ABI. (This may not be the same as the size of a
full CPU virtual address in the case of target ABIs which use 32 bit pointers
on 64 bit CPUs, like sparc32plus.) Definitions of structures that must match
the target's ABI must use this type for anything that on the target is defined
to be an 'unsigned long' or a pointer type.
There is also a signed version, abi_long.
Of course, take all of the above with a grain of salt. If you're about
to use some system interface that requires a type like size_t, pid_t or
@ -78,16 +93,15 @@ avoided.
Use of the malloc/free/realloc/calloc/valloc/memalign/posix_memalign
APIs is not allowed in the QEMU codebase. Instead of these routines,
use the GLib memory allocation routines g_malloc/g_malloc0/g_new/
g_new0/g_realloc/g_free or QEMU's qemu_vmalloc/qemu_memalign/qemu_vfree
g_new0/g_realloc/g_free or QEMU's qemu_memalign/qemu_blockalign/qemu_vfree
APIs.
Please note that g_malloc will exit on allocation failure, so there
is no need to test for failure (as you would have to with malloc).
Calling g_malloc with a zero size is valid and will return NULL.
Memory allocated by qemu_vmalloc or qemu_memalign must be freed with
qemu_vfree, since breaking this will cause problems on Win32 and user
emulators.
Memory allocated by qemu_memalign or qemu_blockalign must be freed with
qemu_vfree, since breaking this will cause problems on Win32.
4. String manipulation

15
LICENSE
View File

@ -1,16 +1,21 @@
The following points clarify the QEMU license:
1) QEMU as a whole is released under the GNU General Public License
1) QEMU as a whole is released under the GNU General Public License,
version 2.
2) Parts of QEMU have specific licenses which are compatible with the
GNU General Public License. Hence each source file contains its own
licensing information.
GNU General Public License, version 2. Hence each source file contains
its own licensing information. Source files with no licensing information
are released under the GNU General Public License, version 2 or (at your
option) any later version.
Many hardware device emulation sources are released under the BSD license.
As of July 2013, contributions under version 2 of the GNU General Public
License (and no later version) are only accepted for the following files
or directories: bsd-user/, linux-user/, hw/misc/vfio.c, hw/xen/xen_pt*.
3) The Tiny Code Generator (TCG) is released under the BSD license
(see license headers in files).
4) QEMU is a trademark of Fabrice Bellard.
Fabrice Bellard.
Fabrice Bellard and the QEMU team

View File

@ -59,82 +59,110 @@ Alpha
M: Richard Henderson <rth@twiddle.net>
S: Maintained
F: target-alpha/
F: hw/alpha/
ARM
M: Paul Brook <paul@codesourcery.com>
M: Peter Maydell <peter.maydell@linaro.org>
S: Maintained
F: target-arm/
F: hw/arm/
F: hw/cpu/a*mpcore.c
CRIS
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
S: Maintained
F: target-cris/
F: hw/cris/
LM32
M: Michael Walle <michael@walle.cc>
S: Maintained
F: target-lm32/
F: hw/lm32/
F: hw/char/lm32_*
M68K
M: Paul Brook <paul@codesourcery.com>
S: Odd Fixes
F: target-m68k/
F: hw/m68k/
MicroBlaze
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
S: Maintained
F: target-microblaze/
F: hw/microblaze/
MIPS
M: Aurelien Jarno <aurelien@aurel32.net>
S: Odd Fixes
F: target-mips/
F: hw/mips/
Moxie
M: Anthony Green <green@moxielogic.com>
S: Maintained
F: target-moxie/
OpenRISC
M: Jia Liu <proljc@gmail.com>
S: Maintained
F: target-openrisc/
F: hw/openrisc/
PowerPC
M: Alexander Graf <agraf@suse.de>
L: qemu-ppc@nongnu.org
S: Maintained
F: target-ppc/
F: hw/ppc/
S390
M: Richard Henderson <rth@twiddle.net>
M: Alexander Graf <agraf@suse.de>
S: Maintained
F: target-s390x/
F: hw/s390x/
SH4
M: Aurelien Jarno <aurelien@aurel32.net>
S: Odd Fixes
F: target-sh4/
F: hw/sh4/
SPARC
M: Blue Swirl <blauwirbel@gmail.com>
S: Maintained
F: target-sparc/
F: hw/sparc/
F: hw/sparc64/
UniCore32
M: Guan Xuetao <gxt@mprc.pku.edu.cn>
S: Maintained
F: target-unicore32/
F: hw/unicore32/
X86
M: qemu-devel@nongnu.org
S: Odd Fixes
F: target-i386/
F: hw/i386/
Xtensa
M: Max Filippov <jcmvbkbc@gmail.com>
W: http://wiki.osll.spb.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
S: Maintained
F: target-xtensa/
F: hw/xtensa/
Guest CPU Cores (KVM):
----------------------
Overall
M: Gleb Natapov <gleb@redhat.com>
M: Marcelo Tosatti <mtosatti@redhat.com>
M: Paolo Bonzini <pbonzini@redhat.com>
L: kvm@vger.kernel.org
S: Supported
F: kvm-*
@ -197,159 +225,162 @@ ARM Machines
Exynos
M: Evgeny Voevodin <e.voevodin@samsung.com>
M: Maksim Kozlov <m.kozlov@samsung.com>
M: Igor Mitsyanko <i.mitsyanko@samsung.com>
M: Igor Mitsyanko <i.mitsyanko@gmail.com>
M: Dmitry Solodkiy <d.solodkiy@samsung.com>
S: Maintained
F: hw/exynos*
F: hw/*/exynos*
Calxeda Highbank
M: Mark Langsdorf <mark.langsdorf@calxeda.com>
S: Supported
F: hw/highbank.c
F: hw/xgmac.c
F: hw/arm/highbank.c
F: hw/net/xgmac.c
Gumstix
M: qemu-devel@nongnu.org
S: Orphan
F: hw/gumstix.c
F: hw/arm/gumstix.c
i.MX31
M: Peter Chubb <peter.chubb@nicta.com.au>
S: Odd fixes
F: hw/imx*
F: hw/kzm.c
F: hw/*/imx*
F: hw/arm/kzm.c
Integrator CP
M: Paul Brook <paul@codesourcery.com>
M: Peter Maydell <peter.maydell@linaro.org>
S: Maintained
F: hw/integratorcp.c
F: hw/arm/integratorcp.c
Mainstone
M: qemu-devel@nongnu.org
S: Orphan
F: hw/mainstone.c
F: hw/arm/mainstone.c
Musicpal
M: Jan Kiszka <jan.kiszka@web.de>
S: Maintained
F: hw/musicpal.c
F: hw/arm/musicpal.c
nSeries
M: Andrzej Zaborowski <balrogg@gmail.com>
S: Maintained
F: hw/nseries.c
F: hw/arm/nseries.c
Palm
M: Andrzej Zaborowski <balrogg@gmail.com>
S: Maintained
F: hw/palm.c
F: hw/arm/palm.c
Real View
M: Paul Brook <paul@codesourcery.com>
M: Peter Maydell <peter.maydell@linaro.org>
S: Maintained
F: hw/realview*
F: hw/arm/realview*
Spitz
M: Andrzej Zaborowski <balrogg@gmail.com>
S: Maintained
F: hw/spitz.c
F: hw/arm/spitz.c
Stellaris
M: Paul Brook <paul@codesourcery.com>
M: Peter Maydell <peter.maydell@linaro.org>
S: Maintained
F: hw/stellaris.c
F: hw/*/stellaris*
Versatile PB
M: Paul Brook <paul@codesourcery.com>
M: Peter Maydell <peter.maydell@linaro.org>
S: Maintained
F: hw/versatilepb.c
F: hw/*/versatile*
Xilinx Zynq
M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
S: Maintained
F: hw/xilinx_zynq.c
F: hw/zynq_slcr.c
F: hw/cadence_*
F: hw/xilinx_spips.c
F: hw/arm/xilinx_zynq.c
F: hw/misc/zynq_slcr.c
F: hw/*/cadence_*
F: hw/ssi/xilinx_spips.c
CRIS Machines
-------------
Axis Dev88
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
S: Maintained
F: hw/axis_dev88.c
etraxfs
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
S: Maintained
F: hw/etraxfs.c
F: hw/cris/axis_dev88.c
F: hw/*/etraxfs_*.c
LM32 Machines
-------------
EVR32 and uclinux BSP
M: Michael Walle <michael@walle.cc>
S: Maintained
F: hw/lm32_boards.c
F: hw/lm32/lm32_boards.c
milkymist
M: Michael Walle <michael@walle.cc>
S: Maintained
F: hw/milkymist.c
F: hw/lm32/milkymist.c
M68K Machines
-------------
an5206
M: Paul Brook <paul@codesourcery.com>
S: Maintained
F: hw/an5206.c
F: hw/m68k/an5206.c
dummy_m68k
M: Paul Brook <paul@codesourcery.com>
S: Maintained
F: hw/dummy_m68k.c
F: hw/m68k/dummy_m68k.c
mcf5208
M: Paul Brook <paul@codesourcery.com>
S: Maintained
F: hw/mcf5208.c
F: hw/m68k/mcf5208.c
MicroBlaze Machines
-------------------
petalogix_s3adsp1800
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
S: Maintained
F: hw/petalogix_s3adsp1800.c
F: hw/microblaze/petalogix_s3adsp1800_mmu.c
petalogix_ml605
M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
S: Maintained
F: hw/petalogix_ml605_mmu.c
F: hw/microblaze/petalogix_ml605_mmu.c
MIPS Machines
-------------
Jazz
M: Hervé Poussineau <hpoussin@reactos.org>
S: Maintained
F: hw/mips_jazz.c
F: hw/mips/mips_jazz.c
Malta
M: Aurelien Jarno <aurelien@aurel32.net>
S: Maintained
F: hw/mips_malta.c
F: hw/mips/mips_malta.c
Mipssim
M: qemu-devel@nongnu.org
S: Orphan
F: hw/mips_mipssim.c
F: hw/mips/mips_mipssim.c
R4000
M: Aurelien Jarno <aurelien@aurel32.net>
S: Maintained
F: hw/mips_r4k.c
F: hw/mips/mips_r4k.c
OpenRISC Machines
-----------------
or1k-sim
M: Jia Liu <proljc@gmail.com>
S: Maintained
F: hw/openrisc/openrisc_sim.c
PowerPC Machines
----------------
@ -357,13 +388,13 @@ PowerPC Machines
M: Alexander Graf <agraf@suse.de>
L: qemu-ppc@nongnu.org
S: Odd Fixes
F: hw/ppc405_boards.c
F: hw/ppc/ppc405_boards.c
Bamboo
M: Alexander Graf <agraf@suse.de>
L: qemu-ppc@nongnu.org
S: Odd Fixes
F: hw/ppc440_bamboo.c
F: hw/ppc/ppc440_bamboo.c
e500
M: Alexander Graf <agraf@suse.de>
@ -379,80 +410,85 @@ M: Scott Wood <scottwood@freescale.com>
L: qemu-ppc@nongnu.org
S: Supported
F: hw/ppc/mpc8544ds.c
F: hw/mpc8544_guts.c
F: hw/ppc/mpc8544_guts.c
New World
M: Alexander Graf <agraf@suse.de>
L: qemu-ppc@nongnu.org
S: Maintained
F: hw/ppc/mac_newworld.c
F: hw/unin_pci.c
F: hw/dec_pci.[hc]
F: hw/pci-host/uninorth.c
F: hw/pci-bridge/dec.[hc]
F: hw/misc/macio/
Old World
M: Alexander Graf <agraf@suse.de>
L: qemu-ppc@nongnu.org
S: Maintained
F: hw/ppc/mac_oldworld.c
F: hw/grackle_pci.c
F: hw/pci-host/grackle.c
F: hw/misc/macio/
PReP
M: Andreas Färber <andreas.faerber@web.de>
L: qemu-ppc@nongnu.org
S: Odd Fixes
F: hw/ppc/prep.c
F: hw/prep_pci.[hc]
F: hw/pc87312.[hc]
F: hw/pci-host/prep.[hc]
F: hw/isa/pc87312.[hc]
sPAPR
M: David Gibson <david@gibson.dropbear.id.au>
M: Alexander Graf <agraf@suse.de>
L: qemu-ppc@nongnu.org
S: Supported
F: hw/spapr*
F: hw/*/spapr*
F: include/hw/*/spapr*
F: hw/*/xics*
F: include/hw/*/xics*
F: pc-bios/spapr-rtas/*
virtex_ml507
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
L: qemu-ppc@nongnu.org
S: Odd Fixes
F: hw/virtex_ml507.c
F: hw/ppc/virtex_ml507.c
SH4 Machines
------------
R2D
M: Magnus Damm <magnus.damm@gmail.com>
S: Maintained
F: hw/r2d.c
F: hw/sh4/r2d.c
Shix
M: Magnus Damm <magnus.damm@gmail.com>
S: Orphan
F: hw/shix.c
F: hw/sh4/shix.c
SPARC Machines
--------------
Sun4m
M: Blue Swirl <blauwirbel@gmail.com>
S: Maintained
F: hw/sun4m.c
F: hw/sparc/sun4m.c
Sun4u
M: Blue Swirl <blauwirbel@gmail.com>
S: Maintained
F: hw/sun4u.c
F: hw/sparc64/sun4u.c
Leon3
M: Fabien Chouteau <chouteau@adacore.com>
S: Maintained
F: hw/leon3.c
F: hw/grlib*
F: hw/sparc/leon3.c
F: hw/*/grlib*
S390 Machines
-------------
S390 Virtio
M: Alexander Graf <agraf@suse.de>
S: Maintained
F: hw/s390-*.c
F: hw/s390x/s390-*.c
S390 Virtio-ccw
M: Cornelia Huck <cornelia.huck@de.ibm.com>
@ -467,7 +503,7 @@ UniCore32 Machines
PKUnity-3 SoC initramfs-with-busybox
M: Guan Xuetao <gxt@mprc.pku.edu.cn>
S: Maintained
F: hw/puv3*
F: hw/*/puv3*
F: hw/unicore32/
X86 Machines
@ -475,90 +511,91 @@ X86 Machines
PC
M: Anthony Liguori <aliguori@us.ibm.com>
S: Supported
F: hw/pc.[ch]
F: hw/pc_piix.c
F: hw/i386/pc.[ch]
F: hw/i386/pc_piix.c
Xtensa Machines
---------------
sim
M: Max Filippov <jcmvbkbc@gmail.com>
S: Maintained
F: hw/xtensa_sim.c
F: hw/xtensa/xtensa_sim.c
Avnet LX60
M: Max Filippov <jcmvbkbc@gmail.com>
S: Maintained
F: hw/xtensa_lx60.c
F: hw/xtensa/xtensa_lx60.c
Devices
-------
IDE
M: Kevin Wolf <kwolf@redhat.com>
S: Odd Fixes
F: include/hw/ide.h
F: hw/ide/
OMAP
M: Peter Maydell <peter.maydell@linaro.org>
S: Maintained
F: hw/omap*
F: hw/*/omap*
PCI
M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
F: include/hw/pci/*
F: hw/pci/*
F: hw/pci*
F: hw/piix*
F: hw/acpi/*
ppc4xx
M: Alexander Graf <agraf@suse.de>
L: qemu-ppc@nongnu.org
S: Odd Fixes
F: hw/ppc4xx*.[hc]
F: hw/ppc/ppc4*.c
ppce500
M: Alexander Graf <agraf@suse.de>
M: Scott Wood <scottwood@freescale.com>
L: qemu-ppc@nongnu.org
S: Supported
F: hw/ppce500_*
F: hw/ppc/e500*
SCSI
M: Paolo Bonzini <pbonzini@redhat.com>
S: Supported
F: hw/virtio-scsi.*
F: hw/scsi*
F: include/hw/scsi*
F: hw/scsi/*
T: git git://github.com/bonzini/qemu.git scsi-next
LSI53C895A
M: Paul Brook <paul@codesourcery.com>
S: Odd Fixes
F: hw/lsi53c895a.c
F: hw/scsi/lsi53c895a.c
SSI
M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
S: Maintained
F: hw/ssi.*
F: hw/m25p80.c
F: hw/ssi/*
F: hw/block/m25p80.c
USB
M: Gerd Hoffmann <kraxel@redhat.com>
S: Maintained
F: hw/usb*
F: hw/usb/*
VFIO
M: Alex Williamson <alex.williamson@redhat.com>
S: Supported
F: hw/vfio*
F: hw/misc/vfio.c
vhost
M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
F: hw/vhost*
F: hw/*/*vhost*
virtio
M: Anthony Liguori <aliguori@us.ibm.com>
S: Supported
F: hw/virtio*
F: hw/*/virtio*
virtio-9p
M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
@ -571,7 +608,7 @@ virtio-blk
M: Kevin Wolf <kwolf@redhat.com>
M: Stefan Hajnoczi <stefanha@redhat.com>
S: Supported
F: hw/virtio-blk*
F: hw/block/virtio-blk.c
virtio-ccw
M: Cornelia Huck <cornelia.huck@de.ibm.com>
@ -582,20 +619,20 @@ T: git git://github.com/cohuck/qemu virtio-ccw-upstr
virtio-serial
M: Amit Shah <amit.shah@redhat.com>
S: Supported
F: hw/virtio-serial*
F: hw/virtio-console*
F: hw/char/virtio-serial-bus.c
F: hw/char/virtio-console.c
nvme
M: Keith Busch <keith.busch@intel.com>
S: Supported
F: hw/block/nvme*
Xilinx EDK
M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
S: Maintained
F: hw/xilinx_axi*
F: hw/xilinx_uartlite.c
F: hw/xilinx_intc.c
F: hw/xilinx_ethlite.c
F: hw/xilinx_timer.c
F: hw/xilinx.h
F: hw/xilinx_spi.c
F: hw/*/xilinx_*
F: include/hw/xilinx.h
Subsystems
----------
@ -603,6 +640,7 @@ Audio
M: Vassili Karpov (malc) <av1474@comtv.ru>
S: Maintained
F: audio/
F: hw/audio/
Block
M: Kevin Wolf <kwolf@redhat.com>
@ -610,6 +648,7 @@ M: Stefan Hajnoczi <stefanha@redhat.com>
S: Supported
F: block*
F: block/
F: hw/block/
Character Devices
M: Anthony Liguori <aliguori@us.ibm.com>
@ -620,14 +659,20 @@ CPU
M: Andreas Färber <afaerber@suse.de>
S: Supported
F: qom/cpu.c
F: include/qemu/cpu.h
F: include/qom/cpu.h
F: target-i386/cpu.c
ICC Bus
M: Igor Mammedov <imammedo@redhat.com>
S: Supported
F: include/hw/cpu/icc_bus.h
F: hw/cpu/icc_bus.c
Device Tree
M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
M: Alexander Graf <agraf@suse.de>
S: Maintained
F: device-tree.[ch]
F: device_tree.[ch]
GDB stub
M: qemu-devel@nongnu.org
@ -638,10 +683,10 @@ F: gdb-xml/
SPICE
M: Gerd Hoffmann <kraxel@redhat.com>
S: Supported
F: ui/qemu-spice.h
F: include/ui/qemu-spice.h
F: ui/spice-*.c
F: audio/spiceaudio.c
F: hw/qxl*
F: hw/display/qxl*
Graphics
M: Anthony Liguori <aliguori@us.ibm.com>
@ -658,11 +703,12 @@ M: Anthony Liguori <aliguori@us.ibm.com>
S: Supported
F: vl.c
Monitor (QMP/HMP)
Human Monitor (HMP)
M: Luiz Capitulino <lcapitulino@redhat.com>
M: Markus Armbruster <armbru@redhat.com>
S: Supported
F: monitor.c
F: hmp.c
F: hmp-commands.hx
Network device layer
M: Anthony Liguori <aliguori@us.ibm.com>
@ -679,6 +725,27 @@ F: nbd.*
F: qemu-nbd.c
T: git git://github.com/bonzini/qemu.git nbd-next
QAPI
M: Luiz Capitulino <lcapitulino@redhat.com>
M: Michael Roth <mdroth@linux.vnet.ibm.com>
S: Supported
F: qapi/
QAPI Schema
M: Eric Blake <eblake@redhat.com>
M: Luiz Capitulino <lcapitulino@redhat.com>
M: Markus Armbruster <armbru@redhat.com>
S: Supported
F: qapi-schema.json
QMP
M: Luiz Capitulino <lcapitulino@redhat.com>
S: Supported
F: qmp.c
F: monitor.c
F: qmp-commands.hx
F: QMP/
SLIRP
M: Jan Kiszka <jan.kiszka@siemens.com>
S: Maintained
@ -715,9 +782,16 @@ Tiny Code Generator (TCG)
-------------------------
Common code
M: qemu-devel@nongnu.org
M: Richard Henderson <rth@twiddle.net>
S: Maintained
F: tcg/
AArch64 target
M: Claudio Fontana <claudio.fontana@huawei.com>
M: Claudio Fontana <claudio.fontana@gmail.com>
S: Maintained
F: tcg/aarch64/
ARM target
M: Andrzej Zaborowski <balrogg@gmail.com>
S: Maintained

121
Makefile
View File

@ -19,6 +19,12 @@ seems to have been used for an in-tree build. You can fix this by running \
endif
endif
CONFIG_SOFTMMU := $(if $(filter %-softmmu,$(TARGET_DIRS)),y)
CONFIG_USER_ONLY := $(if $(filter %-user,$(TARGET_DIRS)),y)
CONFIG_ALL=y
-include config-all-devices.mak
-include config-all-disas.mak
include $(SRC_PATH)/rules.mak
config-host.mak: $(SRC_PATH)/configure
@echo $@ is out-of-date, running configure
@ -35,6 +41,9 @@ GENERATED_HEADERS = config-host.h qemu-options.def
GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h
GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c
GENERATED_HEADERS += trace/generated-events.h
GENERATED_SOURCES += trace/generated-events.c
GENERATED_HEADERS += trace/generated-tracers.h
ifeq ($(TRACE_BACKEND),dtrace)
GENERATED_HEADERS += trace/generated-tracers-dtrace.h
@ -73,7 +82,10 @@ config-all-devices.mak:
$(call quiet-command,echo '# no devices' > $@," GEN $@")
else
config-all-devices.mak: $(SUBDIR_DEVICES_MAK)
$(call quiet-command,cat $(SUBDIR_DEVICES_MAK) | grep =y | sort -u > $@," GEN $@")
$(call quiet-command, sed -n \
's|^\([^=]*\)=\(.*\)$$|\1:=$$(findstring y,$$(\1)\2)|p' \
$(SUBDIR_DEVICES_MAK) | sort -u > $@, \
" GEN $@")
endif
-include $(SUBDIR_DEVICES_MAK_DEP)
@ -101,12 +113,6 @@ endif
defconfig:
rm -f config-all-devices.mak $(SUBDIR_DEVICES_MAK)
-include config-all-devices.mak
-include config-all-disas.mak
CONFIG_SOFTMMU := $(if $(filter %-softmmu,$(TARGET_DIRS)),y)
CONFIG_USER_ONLY := $(if $(filter %-user,$(TARGET_DIRS)),y)
CONFIG_ALL=y
ifneq ($(wildcard config-host.mak),)
include $(SRC_PATH)/Makefile.objs
include $(SRC_PATH)/tests/Makefile
@ -139,6 +145,16 @@ pixman/Makefile: $(SRC_PATH)/pixman/configure
$(SRC_PATH)/pixman/configure:
(cd $(SRC_PATH)/pixman; autoreconf -v --install)
DTC_MAKE_ARGS=-I$(SRC_PATH)/dtc VPATH=$(SRC_PATH)/dtc -C dtc V="$(V)" LIBFDT_srcdir=$(SRC_PATH)/dtc/libfdt
DTC_CFLAGS=$(CFLAGS) $(QEMU_CFLAGS)
DTC_CPPFLAGS=-I$(BUILD_DIR)/dtc -I$(SRC_PATH)/dtc -I$(SRC_PATH)/dtc/libfdt
subdir-dtc:dtc/libfdt dtc/tests
$(call quiet-command,$(MAKE) $(DTC_MAKE_ARGS) CPPFLAGS="$(DTC_CPPFLAGS)" CFLAGS="$(DTC_CFLAGS)" LDFLAGS="$(LDFLAGS)" ARFLAGS="$(ARFLAGS)" CC="$(CC)" AR="$(AR)" LD="$(LD)" $(SUBDIR_MAKEFLAGS) libfdt/libfdt.a,)
dtc/%:
mkdir -p $@
$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y)
ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
@ -151,17 +167,16 @@ recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)
bt-host.o: QEMU_CFLAGS += $(BLUEZ_CFLAGS)
version.o: $(SRC_PATH)/version.rc config-host.h
$(call quiet-command,$(WINDRES) -I. -o $@ $<," RC $(TARGET_DIR)$@")
$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h | $(BUILD_DIR)/version.lo
$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h
version-obj-$(CONFIG_WIN32) += version.o
Makefile: $(version-obj-y)
Makefile: $(version-obj-y) $(version-lobj-y)
######################################################################
# Build libraries
libqemustub.a: $(stub-obj-y)
libqemuutil.a: $(util-obj-y)
libqemuutil.a: $(util-obj-y) qapi-types.o qapi-visit.o
######################################################################
@ -169,7 +184,7 @@ qemu-img.o: qemu-img-cmds.h
qemu-img$(EXESUF): qemu-img.o $(block-obj-y) libqemuutil.a libqemustub.a
qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) libqemuutil.a libqemustub.a
qemu-io$(EXESUF): qemu-io.o cmd.o $(block-obj-y) libqemuutil.a libqemustub.a
qemu-io$(EXESUF): qemu-io.o $(block-obj-y) libqemuutil.a libqemustub.a
qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
@ -198,10 +213,10 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
qapi-types.c qapi-types.h :\
$(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o "." < $<, " GEN $@")
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py $(gen-out-type) -o "." -b < $<, " GEN $@")
qapi-visit.c qapi-visit.h :\
$(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py $(gen-out-type) -o "." < $<, " GEN $@")
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py $(gen-out-type) -o "." -b < $<, " GEN $@")
qmp-commands.h qmp-marshal.c :\
$(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py $(gen-out-type) -m -o "." < $<, " GEN $@")
@ -256,6 +271,7 @@ distclean: clean
rm -rf $$d || exit 1 ; \
done
if test -f pixman/config.log; then make -C pixman distclean; fi
if test -f dtc/version_gen.h; then make $(DTC_MAKE_ARGS) clean; fi
KEYMAPS=da en-gb et fr fr-ch is lt modifiers no pt-br sv \
ar de en-us fi fr-be hr it lv nl pl ru th \
@ -269,10 +285,13 @@ acpi-dsdt.aml q35-acpi-dsdt.aml \
ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc \
pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
qemu-icon.bmp \
efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
efi-pcnet.rom efi-rtl8139.rom efi-virtio.rom \
qemu-icon.bmp qemu_logo_no_text.svg \
bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
multiboot.bin linuxboot.bin kvmvapic.bin \
s390-zipl.rom \
s390-ccw.img \
spapr-rtas.bin slof.bin \
palcode-clipper
else
@ -285,10 +304,13 @@ install-doc: $(DOCS)
$(INSTALL_DATA) QMP/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
ifdef CONFIG_POSIX
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DATA) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
ifneq ($(TOOLS),)
$(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
$(INSTALL_DATA) qemu-nbd.8 "$(DESTDIR)$(mandir)/man8"
endif
endif
ifdef CONFIG_VIRTFS
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
$(INSTALL_DATA) fsdev/virtfs-proxy-helper.1 "$(DESTDIR)$(mandir)/man1"
@ -297,13 +319,21 @@ endif
install-datadir:
$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)"
install-localstatedir:
ifdef CONFIG_POSIX
ifneq (,$(findstring qemu-ga,$(TOOLS)))
$(INSTALL_DIR) "$(DESTDIR)$(qemu_localstatedir)"/run
endif
endif
install-confdir:
$(INSTALL_DIR) "$(DESTDIR)$(qemu_confdir)"
install-sysconfig: install-datadir install-confdir
$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/target-x86_64.conf "$(DESTDIR)$(qemu_confdir)"
install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig install-datadir
install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig \
install-datadir install-localstatedir
$(INSTALL_DIR) "$(DESTDIR)$(bindir)"
ifneq ($(TOOLS),)
$(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)"
@ -404,6 +434,61 @@ qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
qemu-img.texi qemu-nbd.texi qemu-options.texi \
qemu-monitor.texi qemu-img-cmds.texi
ifdef CONFIG_WIN32
INSTALLER = qemu-setup-$(VERSION)$(EXESUF)
nsisflags = -V2 -NOCD
ifneq ($(wildcard $(SRC_PATH)/dll),)
ifeq ($(ARCH),x86_64)
# 64 bit executables
DLL_PATH = $(SRC_PATH)/dll/w64
nsisflags += -DW64
else
# 32 bit executables
DLL_PATH = $(SRC_PATH)/dll/w32
endif
endif
.PHONY: installer
installer: $(INSTALLER)
INSTDIR=/tmp/qemu-nsis
$(INSTALLER): $(SRC_PATH)/qemu.nsi
make install prefix=${INSTDIR}
ifdef SIGNCODE
(cd ${INSTDIR}; \
for i in *.exe; do \
$(SIGNCODE) $${i}; \
done \
)
endif # SIGNCODE
(cd ${INSTDIR}; \
for i in qemu-system-*.exe; do \
arch=$${i%.exe}; \
arch=$${arch#qemu-system-}; \
echo Section \"$$arch\" Section_$$arch; \
echo SetOutPath \"\$$INSTDIR\"; \
echo File \"\$${BINDIR}\\$$i\"; \
echo SectionEnd; \
done \
) >${INSTDIR}/system-emulations.nsh
makensis $(nsisflags) \
$(if $(BUILD_DOCS),-DCONFIG_DOCUMENTATION="y") \
$(if $(CONFIG_GTK),-DCONFIG_GTK="y") \
-DBINDIR="${INSTDIR}" \
$(if $(DLL_PATH),-DDLLDIR="$(DLL_PATH)") \
-DSRCDIR="$(SRC_PATH)" \
-DOUTFILE="$(INSTALLER)" \
$(SRC_PATH)/qemu.nsi
rm -r ${INSTDIR}
ifdef SIGNCODE
$(SIGNCODE) $(INSTALLER)
endif # SIGNCODE
endif # CONFIG_WIN
# Add a dependency on the generated files, so that they are always
# rebuilt before other object files
ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))

View File

@ -13,19 +13,11 @@ block-obj-$(CONFIG_POSIX) += aio-posix.o
block-obj-$(CONFIG_WIN32) += aio-win32.o
block-obj-y += block/
block-obj-y += qapi-types.o qapi-visit.o
block-obj-y += qemu-io-cmds.o
block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
block-obj-y += qemu-coroutine-sleep.o
ifeq ($(CONFIG_UCONTEXT_COROUTINE),y)
block-obj-$(CONFIG_POSIX) += coroutine-ucontext.o
else
ifeq ($(CONFIG_SIGALTSTACK_COROUTINE),y)
block-obj-$(CONFIG_POSIX) += coroutine-sigaltstack.o
else
block-obj-$(CONFIG_POSIX) += coroutine-gthread.o
endif
endif
block-obj-$(CONFIG_WIN32) += coroutine-win32.o
block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o
ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
# Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
@ -41,6 +33,7 @@ libcacard-y += libcacard/vcard.o libcacard/vreader.o
libcacard-y += libcacard/vcard_emul_nss.o
libcacard-y += libcacard/vcard_emul_type.o
libcacard-y += libcacard/card_7816.o
libcacard-y += libcacard/vcardt.o
######################################################################
# Target independent part of system emulation. The long term path is to
@ -58,6 +51,7 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
common-obj-$(CONFIG_LINUX) += fsdev/
common-obj-y += migration.o migration-tcp.o
common-obj-$(CONFIG_RDMA) += migration-rdma.o
common-obj-y += qemu-char.o #aio.o
common-obj-y += block-migration.o
common-obj-y += page_cache.o xbzrle.o
@ -74,7 +68,7 @@ common-obj-y += bt-host.o bt-vhci.o
common-obj-y += dma-helpers.o
common-obj-y += vl.o
common-obj-y += tpm/
common-obj-y += tpm.o
common-obj-$(CONFIG_SLIRP) += slirp/
@ -87,10 +81,15 @@ common-obj-$(CONFIG_SMARTCARD_NSS) += $(libcacard-y)
######################################################################
# qapi
common-obj-y += qmp-marshal.o qapi-visit.o qapi-types.o
common-obj-y += qmp-marshal.o
common-obj-y += qmp.o hmp.o
endif
######################################################################
# some qapi visitors are used by both system and user emulation:
common-obj-y += qapi-visit.o qapi-types.o
#######################################################################
# Target-independent parts used in system and user emulation
common-obj-y += qemu-log.o
@ -99,6 +98,11 @@ common-obj-y += hw/
common-obj-y += qom/
common-obj-y += disas/
######################################################################
# Resource file for Windows executables
version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
######################################################################
# guest agent

View File

@ -1,8 +1,8 @@
# -*- Mode: makefile -*-
include ../config-host.mak
include config-devices.mak
include config-target.mak
include config-devices.mak
include $(SRC_PATH)/rules.mak
$(call set-vpath, $(SRC_PATH))
@ -15,14 +15,14 @@ QEMU_CFLAGS+=-I$(SRC_PATH)/include
ifdef CONFIG_USER_ONLY
# user emulator name
QEMU_PROG=qemu-$(TARGET_ARCH2)
QEMU_PROG=qemu-$(TARGET_NAME)
else
# system emulator name
ifneq (,$(findstring -mwindows,$(LIBS)))
ifneq (,$(findstring -mwindows,$(libs_softmmu)))
# Terminate program name with a 'w' because the linker builds a windows executable.
QEMU_PROGW=qemu-system-$(TARGET_ARCH2)w$(EXESUF)
QEMU_PROGW=qemu-system-$(TARGET_NAME)w$(EXESUF)
endif # windows executable
QEMU_PROG=qemu-system-$(TARGET_ARCH2)$(EXESUF)
QEMU_PROG=qemu-system-$(TARGET_NAME)$(EXESUF)
endif
PROGS=$(QEMU_PROG)
@ -31,15 +31,11 @@ PROGS+=$(QEMU_PROGW)
endif
STPFILES=
ifndef CONFIG_HAIKU
LIBS+=-lm
endif
config-target.h: config-target.h-timestamp
config-target.h-timestamp: config-target.mak
ifdef CONFIG_TRACE_SYSTEMTAP
stap: $(QEMU_PROG).stp
stap: $(QEMU_PROG).stp-installed $(QEMU_PROG).stp
ifdef CONFIG_USER_ONLY
TARGET_TYPE=user
@ -47,14 +43,24 @@ else
TARGET_TYPE=system
endif
$(QEMU_PROG).stp: $(SRC_PATH)/trace-events
$(QEMU_PROG).stp-installed: $(SRC_PATH)/trace-events
$(call quiet-command,$(TRACETOOL) \
--format=stap \
--backend=$(TRACE_BACKEND) \
--binary=$(bindir)/$(QEMU_PROG) \
--target-arch=$(TARGET_ARCH) \
--target-name=$(TARGET_NAME) \
--target-type=$(TARGET_TYPE) \
< $< > $@," GEN $(TARGET_DIR)$(QEMU_PROG).stp-installed")
$(QEMU_PROG).stp: $(SRC_PATH)/trace-events
$(call quiet-command,$(TRACETOOL) \
--format=stap \
--backend=$(TRACE_BACKEND) \
--binary=$(realpath .)/$(QEMU_PROG) \
--target-name=$(TARGET_NAME) \
--target-type=$(TARGET_TYPE) \
< $< > $@," GEN $(TARGET_DIR)$(QEMU_PROG).stp")
else
stap:
endif
@ -64,6 +70,10 @@ all: $(PROGS) stap
# Dummy command so that make thinks it has done something
@true
CONFIG_NO_PCI = $(if $(subst n,,$(CONFIG_PCI)),n,y)
CONFIG_NO_KVM = $(if $(subst n,,$(CONFIG_KVM)),n,y)
CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y)
#########################################################
# cpu emulator library
obj-y = exec.o translate-all.o cpu-exec.o
@ -74,6 +84,7 @@ obj-y += fpu/softfloat.o
obj-y += target-$(TARGET_BASE_ARCH)/
obj-y += disas.o
obj-$(CONFIG_GDBSTUB_XML) += gdbstub-xml.o
obj-$(CONFIG_NO_KVM) += kvm-stub.o
#########################################################
# Linux user emulator target
@ -92,7 +103,7 @@ endif #CONFIG_LINUX_USER
ifdef CONFIG_BSD_USER
QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ARCH)
QEMU_CFLAGS+=-I$(SRC_PATH)/bsd-user -I$(SRC_PATH)/bsd-user/$(TARGET_ABI_DIR)
obj-y += bsd-user/
obj-y += gdbstub.o user-exec.o
@ -102,31 +113,22 @@ endif #CONFIG_BSD_USER
#########################################################
# System emulator target
ifdef CONFIG_SOFTMMU
CONFIG_NO_PCI = $(if $(subst n,,$(CONFIG_PCI)),n,y)
CONFIG_NO_KVM = $(if $(subst n,,$(CONFIG_KVM)),n,y)
CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y)
CONFIG_NO_GET_MEMORY_MAPPING = $(if $(subst n,,$(CONFIG_HAVE_GET_MEMORY_MAPPING)),n,y)
CONFIG_NO_CORE_DUMP = $(if $(subst n,,$(CONFIG_HAVE_CORE_DUMP)),n,y)
obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
obj-y += qtest.o
obj-y += hw/
obj-$(CONFIG_FDT) += device_tree.o
obj-$(CONFIG_KVM) += kvm-all.o
obj-$(CONFIG_NO_KVM) += kvm-stub.o
obj-y += memory.o savevm.o cputlb.o
obj-$(CONFIG_HAVE_GET_MEMORY_MAPPING) += memory_mapping.o
obj-$(CONFIG_HAVE_CORE_DUMP) += dump.o
obj-$(CONFIG_NO_GET_MEMORY_MAPPING) += memory_mapping-stub.o
obj-$(CONFIG_NO_CORE_DUMP) += dump-stub.o
LIBS+=-lz
obj-y += memory_mapping.o
obj-y += dump.o
LIBS+=$(libs_softmmu)
# xen support
obj-$(CONFIG_XEN) += xen-all.o xen-mapcache.o
obj-$(CONFIG_NO_XEN) += xen-stub.o
# Hardware support
ifeq ($(TARGET_ARCH), sparc64)
ifeq ($(TARGET_NAME), sparc64)
obj-y += hw/sparc64/
else
obj-y += hw/$(TARGET_BASE_ARCH)/
@ -155,6 +157,10 @@ include $(SRC_PATH)/Makefile.objs
all-obj-y = $(obj-y)
all-obj-y += $(addprefix ../, $(common-obj-y))
ifndef CONFIG_HAIKU
LIBS+=-lm
endif
ifdef QEMU_PROGW
# The linker builds a windows executable. Make also a console executable.
$(QEMU_PROGW): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
@ -192,7 +198,7 @@ endif
endif
ifdef CONFIG_TRACE_SYSTEMTAP
$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset"
$(INSTALL_DATA) $(QEMU_PROG).stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset"
$(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp"
endif
GENERATED_HEADERS += config-target.h

View File

@ -136,6 +136,24 @@ Example:
Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR
event.
DEVICE_DELETED
-----------------
Emitted whenever the device removal completion is acknowledged
by the guest.
At this point, it's safe to reuse the specified device ID.
Device removal can be initiated by the guest or by HMP/QMP commands.
Data:
- "device": device name (json-string, optional)
- "path": device path (json-string)
{ "event": "DEVICE_DELETED",
"data": { "device": "virtio-net-pci-0",
"path": "/machine/peripheral/virtio-net-pci-0" },
"timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
DEVICE_TRAY_MOVED
-----------------
@ -154,6 +172,23 @@ Data:
},
"timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
NIC_RX_FILTER_CHANGED
-----------------
The event is emitted once until the query command is executed,
the first event will always be emitted.
Data:
- "name": net client name (json-string)
- "path": device path (json-string)
{ "event": "NIC_RX_FILTER_CHANGED",
"data": { "name": "vnet0",
"path": "/machine/peripheral/vnet0/virtio-backend" },
"timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
}
RESET
-----
@ -185,7 +220,8 @@ Emitted when the guest changes the RTC time.
Data:
- "offset": delta against the host UTC in seconds (json-number)
- "offset": Offset between base RTC clock (as specified by -rtc base), and
new RTC clock value (json-number)
Example:
@ -428,3 +464,17 @@ Example:
Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
followed respectively by the RESET, SHUTDOWN, or STOP events.
GUEST_PANICKED
--------------
Emitted when guest OS panic is detected.
Data:
- "action": Action that has been taken (json-string, currently always "pause").
Example:
{ "event": "GUEST_PANICKED",
"data": { "action": "pause" } }

View File

@ -99,9 +99,16 @@ class QMPShell(qmp.QEMUMonitorProtocol):
for arg in cmdargs[1:]:
opt = arg.split('=')
try:
if(len(opt) > 2):
opt[1] = '='.join(opt[1:])
value = int(opt[1])
except ValueError:
value = opt[1]
if opt[1] == 'true':
value = True
elif opt[1] == 'false':
value = False
else:
value = opt[1]
qmpcmd['arguments'][opt[0]] = value
return qmpcmd

View File

@ -1 +1 @@
1.4.50
1.6.0

View File

@ -35,20 +35,20 @@
#include "qemu/bitmap.h"
#include "sysemu/arch_init.h"
#include "audio/audio.h"
#include "hw/pc.h"
#include "hw/i386/pc.h"
#include "hw/pci/pci.h"
#include "hw/audiodev.h"
#include "hw/audio/audio.h"
#include "sysemu/kvm.h"
#include "migration/migration.h"
#include "exec/gdbstub.h"
#include "hw/smbios.h"
#include "hw/i386/smbios.h"
#include "exec/address-spaces.h"
#include "hw/pcspk.h"
#include "hw/audio/pcspk.h"
#include "migration/page_cache.h"
#include "qemu/config-file.h"
#include "qmp-commands.h"
#include "trace.h"
#include "exec/cpu-all.h"
#include "hw/acpi/acpi.h"
#ifdef DEBUG_ARCH_INIT
#define DPRINTF(fmt, ...) \
@ -65,7 +65,7 @@ int graphic_depth = 8;
#else
int graphic_width = 800;
int graphic_height = 600;
int graphic_depth = 15;
int graphic_depth = 32;
#endif
@ -85,6 +85,8 @@ int graphic_depth = 15;
#define QEMU_ARCH QEMU_ARCH_MICROBLAZE
#elif defined(TARGET_MIPS)
#define QEMU_ARCH QEMU_ARCH_MIPS
#elif defined(TARGET_MOXIE)
#define QEMU_ARCH QEMU_ARCH_MOXIE
#elif defined(TARGET_OPENRISC)
#define QEMU_ARCH QEMU_ARCH_OPENRISC
#elif defined(TARGET_PPC)
@ -102,6 +104,9 @@ int graphic_depth = 15;
#endif
const uint32_t arch_type = QEMU_ARCH;
static bool mig_throttle_on;
static int dirty_rate_high_cnt;
static void check_guest_throttling(void);
/***********************************************************/
/* ram save/restore */
@ -113,26 +118,7 @@ const uint32_t arch_type = QEMU_ARCH;
#define RAM_SAVE_FLAG_EOS 0x10
#define RAM_SAVE_FLAG_CONTINUE 0x20
#define RAM_SAVE_FLAG_XBZRLE 0x40
#ifdef __ALTIVEC__
#include <altivec.h>
#define VECTYPE vector unsigned char
#define SPLAT(p) vec_splat(vec_ld(0, p), 0)
#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
/* altivec.h may redefine the bool macro as vector type.
* Reset it to POSIX semantics. */
#undef bool
#define bool _Bool
#elif defined __SSE2__
#include <emmintrin.h>
#define VECTYPE __m128i
#define SPLAT(p) _mm_set1_epi8(*(p))
#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
#else
#define VECTYPE unsigned long
#define SPLAT(p) (*(p) * (~0UL / 255))
#define ALL_EQ(v1, v2) ((v1) == (v2))
#endif
/* 0x80 is reserved in migration.h start with 0x100 next */
static struct defconfig_file {
@ -141,7 +127,7 @@ static struct defconfig_file {
bool userconfig;
} default_config_files[] = {
{ CONFIG_QEMU_CONFDIR "/qemu.conf", true },
{ CONFIG_QEMU_CONFDIR "/target-" TARGET_ARCH ".conf", true },
{ CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
{ NULL }, /* end of list */
};
@ -160,23 +146,14 @@ int qemu_read_default_config_files(bool userconfig)
return ret;
}
}
return 0;
}
static int is_dup_page(uint8_t *page)
static inline bool is_zero_page(uint8_t *p)
{
VECTYPE *p = (VECTYPE *)page;
VECTYPE val = SPLAT(page);
int i;
for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
if (!ALL_EQ(val, p[i])) {
return 0;
}
}
return 1;
return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
TARGET_PAGE_SIZE;
}
/* struct contains XBZRLE cache and a static page
@ -210,6 +187,7 @@ int64_t xbzrle_cache_resize(int64_t new_size)
/* accounting for migration statistics */
typedef struct AccountingInfo {
uint64_t dup_pages;
uint64_t skipped_pages;
uint64_t norm_pages;
uint64_t iterations;
uint64_t xbzrle_bytes;
@ -235,6 +213,16 @@ uint64_t dup_mig_pages_transferred(void)
return acct_info.dup_pages;
}
uint64_t skipped_mig_bytes_transferred(void)
{
return acct_info.skipped_pages * TARGET_PAGE_SIZE;
}
uint64_t skipped_mig_pages_transferred(void)
{
return acct_info.skipped_pages;
}
uint64_t norm_mig_bytes_transferred(void)
{
return acct_info.norm_pages * TARGET_PAGE_SIZE;
@ -346,6 +334,7 @@ static ram_addr_t last_offset;
static unsigned long *migration_bitmap;
static uint64_t migration_dirty_pages;
static uint32_t last_version;
static bool ram_bulk_stage;
static inline
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
@ -355,7 +344,13 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
unsigned long nr = base + (start >> TARGET_PAGE_BITS);
unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
unsigned long next = find_next_bit(migration_bitmap, size, nr);
unsigned long next;
if (ram_bulk_stage && nr > base) {
next = nr + 1;
} else {
next = find_next_bit(migration_bitmap, size, nr);
}
if (next < size) {
clear_bit(next, migration_bitmap);
@ -387,15 +382,21 @@ static void migration_bitmap_sync(void)
uint64_t num_dirty_pages_init = migration_dirty_pages;
MigrationState *s = migrate_get_current();
static int64_t start_time;
static int64_t bytes_xfer_prev;
static int64_t num_dirty_pages_period;
int64_t end_time;
int64_t bytes_xfer_now;
if (!bytes_xfer_prev) {
bytes_xfer_prev = ram_bytes_transferred();
}
if (!start_time) {
start_time = qemu_get_clock_ms(rt_clock);
}
trace_migration_bitmap_sync_start();
memory_global_sync_dirty_bitmap(get_system_memory());
address_space_sync_dirty_bitmap(&address_space_memory);
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
@ -413,6 +414,25 @@ static void migration_bitmap_sync(void)
/* more than 1 second = 1000 millisecons */
if (end_time > start_time + 1000) {
if (migrate_auto_converge()) {
/* The following detection logic can be refined later. For now:
Check to see if the dirtied bytes is 50% more than the approx.
amount of bytes that just got transferred since the last time we
were in this routine. If that happens >N times (for now N==4)
we turn on the throttle down logic */
bytes_xfer_now = ram_bytes_transferred();
if (s->dirty_pages_rate &&
(num_dirty_pages_period * TARGET_PAGE_SIZE >
(bytes_xfer_now - bytes_xfer_prev)/2) &&
(dirty_rate_high_cnt++ > 4)) {
trace_migration_throttle();
mig_throttle_on = true;
dirty_rate_high_cnt = 0;
}
bytes_xfer_prev = bytes_xfer_now;
} else {
mig_throttle_on = false;
}
s->dirty_pages_rate = num_dirty_pages_period * 1000
/ (end_time - start_time);
s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
@ -453,8 +473,10 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
if (!block) {
block = QTAILQ_FIRST(&ram_list.blocks);
complete_round = true;
ram_bulk_stage = false;
}
} else {
int ret;
uint8_t *p;
int cont = (block == last_sent_block) ?
RAM_SAVE_FLAG_CONTINUE : 0;
@ -463,13 +485,24 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
/* In doubt sent page as normal */
bytes_sent = -1;
if (is_dup_page(p)) {
ret = ram_control_save_page(f, block->offset,
offset, TARGET_PAGE_SIZE, &bytes_sent);
if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
if (ret != RAM_SAVE_CONTROL_DELAYED) {
if (bytes_sent > 0) {
acct_info.norm_pages++;
} else if (bytes_sent == 0) {
acct_info.dup_pages++;
}
}
} else if (is_zero_page(p)) {
acct_info.dup_pages++;
bytes_sent = save_block_hdr(f, block, offset, cont,
RAM_SAVE_FLAG_COMPRESS);
qemu_put_byte(f, *p);
bytes_sent += 1;
} else if (migrate_use_xbzrle()) {
qemu_put_byte(f, 0);
bytes_sent++;
} else if (!ram_bulk_stage && migrate_use_xbzrle()) {
current_addr = block->offset + offset;
bytes_sent = save_xbzrle_page(f, p, current_addr, block,
offset, cont, last_stage);
@ -481,7 +514,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
/* XBZRLE overflow or normal page */
if (bytes_sent == -1) {
bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
bytes_sent += TARGET_PAGE_SIZE;
acct_info.norm_pages++;
}
@ -501,6 +534,18 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
static uint64_t bytes_transferred;
void acct_update_position(QEMUFile *f, size_t size, bool zero)
{
uint64_t pages = size / TARGET_PAGE_SIZE;
if (zero) {
acct_info.dup_pages += pages;
} else {
acct_info.norm_pages += pages;
bytes_transferred += size;
qemu_update_position(f, size);
}
}
static ram_addr_t ram_save_remaining(void)
{
return migration_dirty_pages;
@ -556,6 +601,7 @@ static void reset_ram_globals(void)
last_sent_block = NULL;
last_offset = 0;
last_version = ram_list.version;
ram_bulk_stage = true;
}
#define MAX_WAIT 50 /* ms, half buffered_file limit */
@ -568,6 +614,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
migration_bitmap = bitmap_new(ram_pages);
bitmap_set(migration_bitmap, 0, ram_pages);
migration_dirty_pages = ram_pages;
mig_throttle_on = false;
dirty_rate_high_cnt = 0;
if (migrate_use_xbzrle()) {
XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@ -600,6 +648,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
}
qemu_mutex_unlock_ramlist();
ram_control_before_iterate(f, RAM_CONTROL_SETUP);
ram_control_after_iterate(f, RAM_CONTROL_SETUP);
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
return 0;
@ -618,6 +670,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
reset_ram_globals();
}
ram_control_before_iterate(f, RAM_CONTROL_ROUND);
t0 = qemu_get_clock_ns(rt_clock);
i = 0;
while ((ret = qemu_file_rate_limit(f)) == 0) {
@ -630,6 +684,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
}
total_sent += bytes_sent;
acct_info.iterations++;
check_guest_throttling();
/* we want to check in the 1st loop, just in case it was the 1st time
and we had to sync the dirty bitmap.
qemu_get_clock_ns() is a bit expensive, so we only check each some
@ -648,6 +703,12 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
qemu_mutex_unlock_ramlist();
/*
* Must occur before EOS (or any QEMUFile operation)
* because of RDMA protocol.
*/
ram_control_after_iterate(f, RAM_CONTROL_ROUND);
if (ret < 0) {
bytes_transferred += total_sent;
return ret;
@ -665,6 +726,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
qemu_mutex_lock_ramlist();
migration_bitmap_sync();
ram_control_before_iterate(f, RAM_CONTROL_FINISH);
/* try transferring iterative blocks of memory */
/* flush all remaining blocks regardless of rate limiting */
@ -678,6 +741,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
}
bytes_transferred += bytes_sent;
}
ram_control_after_iterate(f, RAM_CONTROL_FINISH);
migration_end();
qemu_mutex_unlock_ramlist();
@ -772,6 +837,24 @@ static inline void *host_from_stream_offset(QEMUFile *f,
return NULL;
}
/*
* If a page (or a whole RDMA chunk) has been
* determined to be zero, then zap it.
*/
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
{
if (ch != 0 || !is_zero_page(host)) {
memset(host, ch, size);
#ifndef _WIN32
if (ch == 0 &&
(!kvm_enabled() || kvm_has_sync_mmu()) &&
getpagesize() <= TARGET_PAGE_SIZE) {
qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
}
#endif
}
}
static int ram_load(QEMUFile *f, void *opaque, int version_id)
{
ram_addr_t addr;
@ -810,6 +893,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
if (!strncmp(id, block->idstr, sizeof(id))) {
if (block->length != length) {
fprintf(stderr,
"Length mismatch: %s: " RAM_ADDR_FMT
" in != " RAM_ADDR_FMT "\n", id, length,
block->length);
ret = -EINVAL;
goto done;
}
@ -839,14 +926,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
}
ch = qemu_get_byte(f);
memset(host, ch, TARGET_PAGE_SIZE);
#ifndef _WIN32
if (ch == 0 &&
(!kvm_enabled() || kvm_has_sync_mmu()) &&
getpagesize() <= TARGET_PAGE_SIZE) {
qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
}
#endif
ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
} else if (flags & RAM_SAVE_FLAG_PAGE) {
void *host;
@ -866,6 +946,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ret = -EINVAL;
goto done;
}
} else if (flags & RAM_SAVE_FLAG_HOOK) {
ram_control_load_hook(f, flags);
}
error = qemu_file_get_error(f);
if (error) {
@ -889,7 +971,6 @@ SaveVMHandlers savevm_ram_handlers = {
.cancel = ram_migration_cancel,
};
#ifdef HAS_AUDIO
struct soundhw {
const char *name;
const char *descr;
@ -901,96 +982,30 @@ struct soundhw {
} init;
};
static struct soundhw soundhw[] = {
#ifdef HAS_AUDIO_CHOICE
#ifdef CONFIG_PCSPK
{
"pcspk",
"PC speaker",
0,
1,
{ .init_isa = pcspk_audio_init }
},
#endif
static struct soundhw soundhw[9];
static int soundhw_count;
#ifdef CONFIG_SB16
{
"sb16",
"Creative Sound Blaster 16",
0,
1,
{ .init_isa = SB16_init }
},
#endif
void isa_register_soundhw(const char *name, const char *descr,
int (*init_isa)(ISABus *bus))
{
assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
soundhw[soundhw_count].name = name;
soundhw[soundhw_count].descr = descr;
soundhw[soundhw_count].isa = 1;
soundhw[soundhw_count].init.init_isa = init_isa;
soundhw_count++;
}
#ifdef CONFIG_CS4231A
{
"cs4231a",
"CS4231A",
0,
1,
{ .init_isa = cs4231a_init }
},
#endif
#ifdef CONFIG_ADLIB
{
"adlib",
#ifdef HAS_YMF262
"Yamaha YMF262 (OPL3)",
#else
"Yamaha YM3812 (OPL2)",
#endif
0,
1,
{ .init_isa = Adlib_init }
},
#endif
#ifdef CONFIG_GUS
{
"gus",
"Gravis Ultrasound GF1",
0,
1,
{ .init_isa = GUS_init }
},
#endif
#ifdef CONFIG_AC97
{
"ac97",
"Intel 82801AA AC97 Audio",
0,
0,
{ .init_pci = ac97_init }
},
#endif
#ifdef CONFIG_ES1370
{
"es1370",
"ENSONIQ AudioPCI ES1370",
0,
0,
{ .init_pci = es1370_init }
},
#endif
#ifdef CONFIG_HDA
{
"hda",
"Intel HD Audio",
0,
0,
{ .init_pci = intel_hda_and_codec_init }
},
#endif
#endif /* HAS_AUDIO_CHOICE */
{ NULL, NULL, 0, 0, { NULL } }
};
void pci_register_soundhw(const char *name, const char *descr,
int (*init_pci)(PCIBus *bus))
{
assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
soundhw[soundhw_count].name = name;
soundhw[soundhw_count].descr = descr;
soundhw[soundhw_count].isa = 0;
soundhw[soundhw_count].init.init_pci = init_pci;
soundhw_count++;
}
void select_soundhw(const char *optarg)
{
@ -999,16 +1014,16 @@ void select_soundhw(const char *optarg)
if (is_help_option(optarg)) {
show_valid_cards:
#ifdef HAS_AUDIO_CHOICE
printf("Valid sound card names (comma separated):\n");
for (c = soundhw; c->name; ++c) {
printf ("%-11s %s\n", c->name, c->descr);
if (soundhw_count) {
printf("Valid sound card names (comma separated):\n");
for (c = soundhw; c->name; ++c) {
printf ("%-11s %s\n", c->name, c->descr);
}
printf("\n-soundhw all will enable all of the above\n");
} else {
printf("Machine has no user-selectable audio hardware "
"(it may or may not have always-present audio hardware).\n");
}
printf("\n-soundhw all will enable all of the above\n");
#else
printf("Machine has no user-selectable audio hardware "
"(it may or may not have always-present audio hardware).\n");
#endif
exit(!is_help_option(optarg));
}
else {
@ -1056,32 +1071,30 @@ void select_soundhw(const char *optarg)
}
}
void audio_init(ISABus *isa_bus, PCIBus *pci_bus)
void audio_init(void)
{
struct soundhw *c;
ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
for (c = soundhw; c->name; ++c) {
if (c->enabled) {
if (c->isa) {
if (isa_bus) {
c->init.init_isa(isa_bus);
if (!isa_bus) {
fprintf(stderr, "ISA bus not available for %s\n", c->name);
exit(1);
}
c->init.init_isa(isa_bus);
} else {
if (pci_bus) {
c->init.init_pci(pci_bus);
if (!pci_bus) {
fprintf(stderr, "PCI bus not available for %s\n", c->name);
exit(1);
}
c->init.init_pci(pci_bus);
}
}
}
}
#else
void select_soundhw(const char *optarg)
{
}
void audio_init(ISABus *isa_bus, PCIBus *pci_bus)
{
}
#endif
int qemu_uuid_parse(const char *str, uint8_t *uuid)
{
@ -1100,16 +1113,21 @@ int qemu_uuid_parse(const char *str, uint8_t *uuid)
return -1;
}
#ifdef TARGET_I386
smbios_add_field(1, offsetof(struct smbios_type_1, uuid), 16, uuid);
smbios_add_field(1, offsetof(struct smbios_type_1, uuid), uuid, 16);
#endif
return 0;
}
void do_acpitable_option(const char *optarg)
void do_acpitable_option(const QemuOpts *opts)
{
#ifdef TARGET_I386
if (acpi_table_add(optarg) < 0) {
fprintf(stderr, "Wrong acpi table provided\n");
Error *err = NULL;
acpi_table_add(opts, &err);
if (err) {
fprintf(stderr, "Wrong acpi table provided: %s\n",
error_get_pretty(err));
error_free(err);
exit(1);
}
#endif
@ -1119,7 +1137,6 @@ void do_smbios_option(const char *optarg)
{
#ifdef TARGET_I386
if (smbios_entry_add(optarg) < 0) {
fprintf(stderr, "Wrong smbios provided\n");
exit(1);
}
#endif
@ -1132,15 +1149,6 @@ void cpudef_init(void)
#endif
}
int audio_available(void)
{
#ifdef HAS_AUDIO
return 1;
#else
return 0;
#endif
}
int tcg_available(void)
{
return 1;
@ -1169,7 +1177,57 @@ TargetInfo *qmp_query_target(Error **errp)
{
TargetInfo *info = g_malloc0(sizeof(*info));
info->arch = TARGET_TYPE;
info->arch = g_strdup(TARGET_NAME);
return info;
}
/* Stub function that's gets run on the vcpu when its brought out of the
VM to run inside qemu via async_run_on_cpu()*/
static void mig_sleep_cpu(void *opq)
{
qemu_mutex_unlock_iothread();
g_usleep(30*1000);
qemu_mutex_lock_iothread();
}
/* To reduce the dirty rate explicitly disallow the VCPUs from spending
much time in the VM. The migration thread will try to catchup.
Workload will experience a performance drop.
*/
static void mig_throttle_cpu_down(CPUState *cpu, void *data)
{
async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
}
static void mig_throttle_guest_down(void)
{
qemu_mutex_lock_iothread();
qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
qemu_mutex_unlock_iothread();
}
static void check_guest_throttling(void)
{
static int64_t t0;
int64_t t1;
if (!mig_throttle_on) {
return;
}
if (!t0) {
t0 = qemu_get_clock_ns(rt_clock);
return;
}
t1 = qemu_get_clock_ns(rt_clock);
/* If it has been more than 40 ms since the last time the guest
* was throttled then do it again.
*/
if (40 < (t1-t0)/1000000) {
mig_throttle_guest_down();
t0 = t1;
}
}

33
async.c
View File

@ -47,11 +47,16 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
bh->ctx = ctx;
bh->cb = cb;
bh->opaque = opaque;
qemu_mutex_lock(&ctx->bh_lock);
bh->next = ctx->first_bh;
/* Make sure that the members are ready before putting bh into list */
smp_wmb();
ctx->first_bh = bh;
qemu_mutex_unlock(&ctx->bh_lock);
return bh;
}
/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
int aio_bh_poll(AioContext *ctx)
{
QEMUBH *bh, **bhp, *next;
@ -61,9 +66,15 @@ int aio_bh_poll(AioContext *ctx)
ret = 0;
for (bh = ctx->first_bh; bh; bh = next) {
/* Make sure that fetching bh happens before accessing its members */
smp_read_barrier_depends();
next = bh->next;
if (!bh->deleted && bh->scheduled) {
bh->scheduled = 0;
/* Paired with write barrier in bh schedule to ensure reading for
* idle & callbacks coming after bh's scheduling.
*/
smp_rmb();
if (!bh->idle)
ret = 1;
bh->idle = 0;
@ -75,6 +86,7 @@ int aio_bh_poll(AioContext *ctx)
/* remove deleted bhs */
if (!ctx->walking_bh) {
qemu_mutex_lock(&ctx->bh_lock);
bhp = &ctx->first_bh;
while (*bhp) {
bh = *bhp;
@ -85,6 +97,7 @@ int aio_bh_poll(AioContext *ctx)
bhp = &bh->next;
}
}
qemu_mutex_unlock(&ctx->bh_lock);
}
return ret;
@ -94,24 +107,38 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
{
if (bh->scheduled)
return;
bh->scheduled = 1;
bh->idle = 1;
/* Make sure that idle & any writes needed by the callback are done
* before the locations are read in the aio_bh_poll.
*/
smp_wmb();
bh->scheduled = 1;
}
void qemu_bh_schedule(QEMUBH *bh)
{
if (bh->scheduled)
return;
bh->scheduled = 1;
bh->idle = 0;
/* Make sure that idle & any writes needed by the callback are done
* before the locations are read in the aio_bh_poll.
*/
smp_wmb();
bh->scheduled = 1;
aio_notify(bh->ctx);
}
/* This func is async.
*/
void qemu_bh_cancel(QEMUBH *bh)
{
bh->scheduled = 0;
}
/* This func is async.The bottom half will do the delete action at the finial
* end.
*/
void qemu_bh_delete(QEMUBH *bh)
{
bh->scheduled = 0;
@ -176,6 +203,7 @@ aio_ctx_finalize(GSource *source)
thread_pool_free(ctx->thread_pool);
aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
event_notifier_cleanup(&ctx->notifier);
qemu_mutex_destroy(&ctx->bh_lock);
g_array_free(ctx->pollfds, TRUE);
}
@ -211,6 +239,7 @@ AioContext *aio_context_new(void)
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
ctx->thread_pool = NULL;
qemu_mutex_init(&ctx->bh_lock);
event_notifier_init(&ctx->notifier, false);
aio_set_event_notifier(ctx, &ctx->notifier,
(EventNotifierHandler *)

View File

@ -243,38 +243,13 @@ static inline int audio_ring_dist (int dst, int src, int len)
return (dst >= src) ? (dst - src) : (len - src + dst);
}
static void GCC_ATTR dolog (const char *fmt, ...)
{
va_list ap;
va_start (ap, fmt);
AUD_vlog (AUDIO_CAP, fmt, ap);
va_end (ap);
}
#define dolog(fmt, ...) AUD_log(AUDIO_CAP, fmt, ## __VA_ARGS__)
#ifdef DEBUG
static void GCC_ATTR ldebug (const char *fmt, ...)
{
va_list ap;
va_start (ap, fmt);
AUD_vlog (AUDIO_CAP, fmt, ap);
va_end (ap);
}
#define ldebug(fmt, ...) AUD_log(AUDIO_CAP, fmt, ## __VA_ARGS__)
#else
#if defined NDEBUG && defined __GNUC__
#define ldebug(...)
#elif defined NDEBUG && defined _MSC_VER
#define ldebug __noop
#else
static void GCC_ATTR ldebug (const char *fmt, ...)
{
(void) fmt;
}
#define ldebug(fmt, ...) (void)0
#endif
#endif
#undef GCC_ATTR
#define AUDIO_STRINGIFY_(n) #n
#define AUDIO_STRINGIFY(n) AUDIO_STRINGIFY_(n)

View File

@ -1,7 +1,6 @@
/* public domain */
#include "qemu-common.h"
#include "audio.h"
#define AUDIO_CAP "win-int"
#include <windows.h>

View File

@ -25,11 +25,7 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#ifdef __OpenBSD__
#include <soundcard.h>
#else
#include <sys/soundcard.h>
#endif
#include "qemu-common.h"
#include "qemu/main-loop.h"
#include "qemu/host-utils.h"

View File

@ -4,3 +4,5 @@ common-obj-$(CONFIG_POSIX) += rng-random.o
common-obj-y += msmouse.o
common-obj-$(CONFIG_BRLAPI) += baum.o
$(obj)/baum.o: QEMU_CFLAGS += $(SDL_CFLAGS)
common-obj-$(CONFIG_TPM) += tpm.o

View File

@ -22,7 +22,7 @@
* THE SOFTWARE.
*/
#include "qemu-common.h"
#include "char/char.h"
#include "sysemu/char.h"
#include "qemu/timer.h"
#include "hw/usb.h"
#include <brlapi.h>
@ -611,8 +611,6 @@ CharDriverState *chr_baum_init(void)
qemu_set_fd_handler(baum->brlapi_fd, baum_chr_read, NULL, baum);
qemu_chr_generic_open(chr);
return chr;
fail:

View File

@ -23,7 +23,7 @@
*/
#include <stdlib.h>
#include "qemu-common.h"
#include "char/char.h"
#include "sysemu/char.h"
#include "ui/console.h"
#define MSMOUSE_LO6(n) ((n) & 0x3f)
@ -70,6 +70,7 @@ CharDriverState *qemu_chr_open_msmouse(void)
chr = g_malloc0(sizeof(CharDriverState));
chr->chr_write = msmouse_chr_write;
chr->chr_close = msmouse_chr_close;
chr->explicit_be_open = true;
qemu_add_mouse_event_handler(msmouse_event, chr, 0, "QEMU Microsoft Mouse");

View File

@ -10,8 +10,8 @@
* See the COPYING file in the top-level directory.
*/
#include "qemu/rng.h"
#include "char/char.h"
#include "sysemu/rng.h"
#include "sysemu/char.h"
#include "qapi/qmp/qerror.h"
#include "hw/qdev.h" /* just for DEFINE_PROP_CHR */
@ -149,6 +149,11 @@ static void rng_egd_opened(RngBackend *b, Error **errp)
return;
}
if (qemu_chr_fe_claim(s->chr) != 0) {
error_set(errp, QERR_DEVICE_IN_USE, s->chr_name);
return;
}
/* FIXME we should resubmit pending requests when the CDS reconnects. */
qemu_chr_add_handlers(s->chr, rng_egd_chr_can_read, rng_egd_chr_read,
NULL, s);
@ -191,6 +196,7 @@ static void rng_egd_finalize(Object *obj)
if (s->chr) {
qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL);
qemu_chr_fe_release(s->chr);
}
g_free(s->chr_name);

View File

@ -10,8 +10,8 @@
* See the COPYING file in the top-level directory.
*/
#include "qemu/rng-random.h"
#include "qemu/rng.h"
#include "sysemu/rng-random.h"
#include "sysemu/rng.h"
#include "qapi/qmp/qerror.h"
#include "qemu/main-loop.h"
@ -41,6 +41,9 @@ static void entropy_available(void *opaque)
ssize_t len;
len = read(s->fd, buffer, s->size);
if (len < 0 && errno == EAGAIN) {
return;
}
g_assert(len != -1);
s->receive_func(s->opaque, buffer, len);
@ -75,9 +78,8 @@ static void rng_random_opened(RngBackend *b, Error **errp)
"filename", "a valid filename");
} else {
s->fd = qemu_open(s->filename, O_RDONLY | O_NONBLOCK);
if (s->fd == -1) {
error_set(errp, QERR_OPEN_FILE_FAILED, s->filename);
error_setg_file_open(errp, errno, s->filename);
}
}
}

View File

@ -10,7 +10,7 @@
* See the COPYING file in the top-level directory.
*/
#include "qemu/rng.h"
#include "sysemu/rng.h"
#include "qapi/qmp/qerror.h"
void rng_backend_request_entropy(RngBackend *s, size_t size,

190
backends/tpm.c Normal file
View File

@ -0,0 +1,190 @@
/*
* QEMU TPM Backend
*
* Copyright IBM, Corp. 2013
*
* Authors:
* Stefan Berger <stefanb@us.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
* Based on backends/rng.c by Anthony Liguori
*/
#include "sysemu/tpm_backend.h"
#include "qapi/qmp/qerror.h"
#include "sysemu/tpm.h"
#include "qemu/thread.h"
#include "sysemu/tpm_backend_int.h"
enum TpmType tpm_backend_get_type(TPMBackend *s)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
return k->ops->type;
}
const char *tpm_backend_get_desc(TPMBackend *s)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
return k->ops->desc();
}
void tpm_backend_destroy(TPMBackend *s)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
return k->ops->destroy(s);
}
int tpm_backend_init(TPMBackend *s, TPMState *state,
TPMRecvDataCB *datacb)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
return k->ops->init(s, state, datacb);
}
int tpm_backend_startup_tpm(TPMBackend *s)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
return k->ops->startup_tpm(s);
}
bool tpm_backend_had_startup_error(TPMBackend *s)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
return k->ops->had_startup_error(s);
}
size_t tpm_backend_realloc_buffer(TPMBackend *s, TPMSizedBuffer *sb)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
return k->ops->realloc_buffer(sb);
}
void tpm_backend_deliver_request(TPMBackend *s)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
k->ops->deliver_request(s);
}
void tpm_backend_reset(TPMBackend *s)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
k->ops->reset(s);
}
void tpm_backend_cancel_cmd(TPMBackend *s)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
k->ops->cancel_cmd(s);
}
bool tpm_backend_get_tpm_established_flag(TPMBackend *s)
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
return k->ops->get_tpm_established_flag(s);
}
static bool tpm_backend_prop_get_opened(Object *obj, Error **errp)
{
TPMBackend *s = TPM_BACKEND(obj);
return s->opened;
}
void tpm_backend_open(TPMBackend *s, Error **errp)
{
object_property_set_bool(OBJECT(s), true, "opened", errp);
}
static void tpm_backend_prop_set_opened(Object *obj, bool value, Error **errp)
{
TPMBackend *s = TPM_BACKEND(obj);
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
if (value == s->opened) {
return;
}
if (!value && s->opened) {
error_set(errp, QERR_PERMISSION_DENIED);
return;
}
if (k->opened) {
k->opened(s, errp);
}
if (!error_is_set(errp)) {
s->opened = value;
}
}
static void tpm_backend_instance_init(Object *obj)
{
object_property_add_bool(obj, "opened",
tpm_backend_prop_get_opened,
tpm_backend_prop_set_opened,
NULL);
}
void tpm_backend_thread_deliver_request(TPMBackendThread *tbt)
{
g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_PROCESS_CMD, NULL);
}
void tpm_backend_thread_create(TPMBackendThread *tbt,
GFunc func, gpointer user_data)
{
if (!tbt->pool) {
tbt->pool = g_thread_pool_new(func, user_data, 1, TRUE, NULL);
g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_INIT, NULL);
}
}
void tpm_backend_thread_end(TPMBackendThread *tbt)
{
if (tbt->pool) {
g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_END, NULL);
g_thread_pool_free(tbt->pool, FALSE, TRUE);
tbt->pool = NULL;
}
}
void tpm_backend_thread_tpm_reset(TPMBackendThread *tbt,
GFunc func, gpointer user_data)
{
if (!tbt->pool) {
tpm_backend_thread_create(tbt, func, user_data);
} else {
g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_TPM_RESET,
NULL);
}
}
static const TypeInfo tpm_backend_info = {
.name = TYPE_TPM_BACKEND,
.parent = TYPE_OBJECT,
.instance_size = sizeof(TPMBackend),
.instance_init = tpm_backend_instance_init,
.class_size = sizeof(TPMBackendClass),
.abstract = true,
};
static void register_types(void)
{
type_register_static(&tpm_backend_info);
}
type_init(register_types);

View File

@ -29,6 +29,7 @@
#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
#define BLK_MIG_FLAG_EOS 0x02
#define BLK_MIG_FLAG_PROGRESS 0x04
#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
#define MAX_IS_ALLOCATED_SEARCH 65536
@ -80,6 +81,7 @@ typedef struct BlkMigState {
int shared_base;
QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
int64_t total_sector_sum;
bool zero_blocks;
/* Protected by lock. */
QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
@ -114,16 +116,30 @@ static void blk_mig_unlock(void)
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
{
int len;
uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
if (block_mig_state.zero_blocks &&
buffer_is_zero(blk->buf, BLOCK_SIZE)) {
flags |= BLK_MIG_FLAG_ZERO_BLOCK;
}
/* sector number and flags */
qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
| BLK_MIG_FLAG_DEVICE_BLOCK);
| flags);
/* device name */
len = strlen(blk->bmds->bs->device_name);
qemu_put_byte(f, len);
qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
/* if a block is zero we need to flush here since the network
* bandwidth is now a lot higher than the storage device bandwidth.
* thus if we queue zero blocks we slow down the migration */
if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
qemu_fflush(f);
return;
}
qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
}
@ -344,6 +360,7 @@ static void init_blk_migration(QEMUFile *f)
block_mig_state.total_sector_sum = 0;
block_mig_state.prev_progress = -1;
block_mig_state.bulk_completed = 0;
block_mig_state.zero_blocks = migrate_zero_blocks();
bdrv_iterate(init_blk_migration_it, NULL);
}
@ -762,12 +779,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
}
buf = g_malloc(BLOCK_SIZE);
if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
ret = bdrv_write_zeroes(bs, addr, nr_sectors);
} else {
buf = g_malloc(BLOCK_SIZE);
qemu_get_buffer(f, buf, BLOCK_SIZE);
ret = bdrv_write(bs, addr, buf, nr_sectors);
g_free(buf);
}
qemu_get_buffer(f, buf, BLOCK_SIZE);
ret = bdrv_write(bs, addr, buf, nr_sectors);
g_free(buf);
if (ret < 0) {
return ret;
}

850
block.c

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,9 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-y += vhdx.o
block-obj-y += parallels.o blkdebug.o blkverify.o
block-obj-y += snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
@ -13,11 +15,13 @@ block-obj-$(CONFIG_LIBISCSI) += iscsi.o
block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
endif
common-obj-y += stream.o
common-obj-y += commit.o
common-obj-y += mirror.o
common-obj-y += backup.o
common-obj-y += blkmemory.o
$(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS)

386
block/backup.c Normal file
View File

@ -0,0 +1,386 @@
/*
* QEMU backup
*
* Copyright (C) 2013 Proxmox Server Solutions
*
* Authors:
* Dietmar Maurer (dietmar@proxmox.com)
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include "trace.h"
#include "block/block.h"
#include "block/block_int.h"
#include "block/blockjob.h"
#include "qemu/ratelimit.h"
#define BACKUP_CLUSTER_BITS 16
#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
#define SLICE_TIME 100000000ULL /* ns */
typedef struct CowRequest {
int64_t start;
int64_t end;
QLIST_ENTRY(CowRequest) list;
CoQueue wait_queue; /* coroutines blocked on this request */
} CowRequest;
typedef struct BackupBlockJob {
BlockJob common;
BlockDriverState *target;
MirrorSyncMode sync_mode;
RateLimit limit;
BlockdevOnError on_source_error;
BlockdevOnError on_target_error;
CoRwlock flush_rwlock;
uint64_t sectors_read;
HBitmap *bitmap;
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
/* See if in-flight requests overlap and wait for them to complete */
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
int64_t start,
int64_t end)
{
CowRequest *req;
bool retry;
do {
retry = false;
QLIST_FOREACH(req, &job->inflight_reqs, list) {
if (end > req->start && start < req->end) {
qemu_co_queue_wait(&req->wait_queue);
retry = true;
break;
}
}
} while (retry);
}
/* Keep track of an in-flight request */
static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
int64_t start, int64_t end)
{
req->start = start;
req->end = end;
qemu_co_queue_init(&req->wait_queue);
QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
}
/* Forget about a completed request */
static void cow_request_end(CowRequest *req)
{
QLIST_REMOVE(req, list);
qemu_co_queue_restart_all(&req->wait_queue);
}
static int coroutine_fn backup_do_cow(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
bool *error_is_read)
{
BackupBlockJob *job = (BackupBlockJob *)bs->job;
CowRequest cow_request;
struct iovec iov;
QEMUIOVector bounce_qiov;
void *bounce_buffer = NULL;
int ret = 0;
int64_t start, end;
int n;
qemu_co_rwlock_rdlock(&job->flush_rwlock);
start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
wait_for_overlapping_requests(job, start, end);
cow_request_begin(&cow_request, job, start, end);
for (; start < end; start++) {
if (hbitmap_get(job->bitmap, start)) {
trace_backup_do_cow_skip(job, start);
continue; /* already copied */
}
trace_backup_do_cow_process(job, start);
n = MIN(BACKUP_SECTORS_PER_CLUSTER,
job->common.len / BDRV_SECTOR_SIZE -
start * BACKUP_SECTORS_PER_CLUSTER);
if (!bounce_buffer) {
bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
}
iov.iov_base = bounce_buffer;
iov.iov_len = n * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
&bounce_qiov);
if (ret < 0) {
trace_backup_do_cow_read_fail(job, start, ret);
if (error_is_read) {
*error_is_read = true;
}
goto out;
}
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ret = bdrv_co_write_zeroes(job->target,
start * BACKUP_SECTORS_PER_CLUSTER, n);
} else {
ret = bdrv_co_writev(job->target,
start * BACKUP_SECTORS_PER_CLUSTER, n,
&bounce_qiov);
}
if (ret < 0) {
trace_backup_do_cow_write_fail(job, start, ret);
if (error_is_read) {
*error_is_read = false;
}
goto out;
}
hbitmap_set(job->bitmap, start, 1);
/* Publish progress, guest I/O counts as progress too. Note that the
* offset field is an opaque progress value, it is not a disk offset.
*/
job->sectors_read += n;
job->common.offset += n * BDRV_SECTOR_SIZE;
}
out:
if (bounce_buffer) {
qemu_vfree(bounce_buffer);
}
cow_request_end(&cow_request);
trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);
qemu_co_rwlock_unlock(&job->flush_rwlock);
return ret;
}
static int coroutine_fn backup_before_write_notify(
NotifierWithReturn *notifier,
void *opaque)
{
BdrvTrackedRequest *req = opaque;
return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
}
static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
if (speed < 0) {
error_set(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}
static void backup_iostatus_reset(BlockJob *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
bdrv_iostatus_reset(s->target);
}
static const BlockJobType backup_job_type = {
.instance_size = sizeof(BackupBlockJob),
.job_type = "backup",
.set_speed = backup_set_speed,
.iostatus_reset = backup_iostatus_reset,
};
static BlockErrorAction backup_error_action(BackupBlockJob *job,
bool read, int error)
{
if (read) {
return block_job_error_action(&job->common, job->common.bs,
job->on_source_error, true, error);
} else {
return block_job_error_action(&job->common, job->target,
job->on_target_error, false, error);
}
}
static void coroutine_fn backup_run(void *opaque)
{
BackupBlockJob *job = opaque;
BlockDriverState *bs = job->common.bs;
BlockDriverState *target = job->target;
BlockdevOnError on_target_error = job->on_target_error;
NotifierWithReturn before_write = {
.notify = backup_before_write_notify,
};
int64_t start, end;
int ret = 0;
QLIST_INIT(&job->inflight_reqs);
qemu_co_rwlock_init(&job->flush_rwlock);
start = 0;
end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
BACKUP_SECTORS_PER_CLUSTER);
job->bitmap = hbitmap_alloc(end, 0);
bdrv_set_enable_write_cache(target, true);
bdrv_set_on_error(target, on_target_error, on_target_error);
bdrv_iostatus_enable(target);
bdrv_add_before_write_notifier(bs, &before_write);
if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
while (!block_job_is_cancelled(&job->common)) {
/* Yield until the job is cancelled. We just let our before_write
* notify callback service CoW requests. */
job->common.busy = false;
qemu_coroutine_yield();
job->common.busy = true;
}
} else {
/* Both FULL and TOP SYNC_MODE's require copying.. */
for (; start < end; start++) {
bool error_is_read;
if (block_job_is_cancelled(&job->common)) {
break;
}
/* we need to yield so that qemu_aio_flush() returns.
* (without, VM does not reboot)
*/
if (job->common.speed) {
uint64_t delay_ns = ratelimit_calculate_delay(
&job->limit, job->sectors_read);
job->sectors_read = 0;
block_job_sleep_ns(&job->common, rt_clock, delay_ns);
} else {
block_job_sleep_ns(&job->common, rt_clock, 0);
}
if (block_job_is_cancelled(&job->common)) {
break;
}
if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
int i, n;
int alloced = 0;
/* Check to see if these blocks are already in the
* backing file. */
for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
/* bdrv_co_is_allocated() only returns true/false based
* on the first set of sectors it comes accross that
* are are all in the same state.
* For that reason we must verify each sector in the
* backup cluster length. We end up copying more than
* needed but at some point that is always the case. */
alloced =
bdrv_co_is_allocated(bs,
start * BACKUP_SECTORS_PER_CLUSTER + i,
BACKUP_SECTORS_PER_CLUSTER - i, &n);
i += n;
if (alloced == 1) {
break;
}
}
/* If the above loop never found any sectors that are in
* the topmost image, skip this backup. */
if (alloced == 0) {
continue;
}
}
/* FULL sync mode we copy the whole drive. */
ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
BlockErrorAction action =
backup_error_action(job, error_is_read, -ret);
if (action == BDRV_ACTION_REPORT) {
break;
} else {
start--;
continue;
}
}
}
}
notifier_with_return_remove(&before_write);
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&job->flush_rwlock);
qemu_co_rwlock_unlock(&job->flush_rwlock);
hbitmap_free(job->bitmap);
bdrv_iostatus_disable(target);
bdrv_delete(target);
block_job_completed(&job->common, ret);
}
void backup_start(BlockDriverState *bs, BlockDriverState *target,
int64_t speed, MirrorSyncMode sync_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockDriverCompletionFunc *cb, void *opaque,
Error **errp)
{
int64_t len;
assert(bs);
assert(target);
assert(cb);
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
!bdrv_iostatus_is_enabled(bs)) {
error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
return;
}
len = bdrv_getlength(bs);
if (len < 0) {
error_setg_errno(errp, -len, "unable to get length for '%s'",
bdrv_get_device_name(bs));
return;
}
BackupBlockJob *job = block_job_create(&backup_job_type, bs, speed,
cb, opaque, errp);
if (!job) {
return;
}
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
job->target = target;
job->sync_mode = sync_mode;
job->common.len = len;
job->common.co = qemu_coroutine_create(backup_run);
qemu_coroutine_enter(job->common.co, job);
}

View File

@ -182,6 +182,9 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
[BLKDBG_CLUSTER_ALLOC] = "cluster_alloc",
[BLKDBG_CLUSTER_ALLOC_BYTES] = "cluster_alloc_bytes",
[BLKDBG_CLUSTER_FREE] = "cluster_free",
[BLKDBG_FLUSH_TO_OS] = "flush_to_os",
[BLKDBG_FLUSH_TO_DISK] = "flush_to_disk",
};
static int get_event_by_name(const char *name, BlkDebugEvent *event)
@ -273,11 +276,6 @@ static int read_config(BDRVBlkdebugState *s, const char *filename)
int ret;
struct add_rule_data d;
/* Allow usage without config file */
if (!*filename) {
return 0;
}
f = fopen(filename, "r");
if (f == NULL) {
return -errno;
@ -304,43 +302,98 @@ fail:
}
/* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
static int blkdebug_open(BlockDriverState *bs, const char *filename, int flags)
static void blkdebug_parse_filename(const char *filename, QDict *options,
Error **errp)
{
BDRVBlkdebugState *s = bs->opaque;
int ret;
char *config, *c;
const char *c;
/* Parse the blkdebug: prefix */
if (strncmp(filename, "blkdebug:", strlen("blkdebug:"))) {
return -EINVAL;
if (!strstart(filename, "blkdebug:", &filename)) {
error_setg(errp, "File name string must start with 'blkdebug:'");
return;
}
filename += strlen("blkdebug:");
/* Read rules from config file */
/* Parse config file path */
c = strchr(filename, ':');
if (c == NULL) {
return -EINVAL;
error_setg(errp, "blkdebug requires both config file and image path");
return;
}
config = g_strdup(filename);
config[c - filename] = '\0';
ret = read_config(s, config);
g_free(config);
if (ret < 0) {
return ret;
if (c != filename) {
QString *config_path;
config_path = qstring_from_substr(filename, 0, c - filename - 1);
qdict_put(options, "config", config_path);
}
/* TODO Allow multi-level nesting and set file.filename here */
filename = c + 1;
qdict_put(options, "x-image", qstring_from_str(filename));
}
static QemuOptsList runtime_opts = {
.name = "blkdebug",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "config",
.type = QEMU_OPT_STRING,
.help = "Path to the configuration file",
},
{
.name = "x-image",
.type = QEMU_OPT_STRING,
.help = "[internal use only, will be removed]",
},
{ /* end of list */ }
},
};
static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVBlkdebugState *s = bs->opaque;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename, *config;
int ret;
opts = qemu_opts_create_nofail(&runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
ret = -EINVAL;
goto fail;
}
/* Read rules from config file */
config = qemu_opt_get(opts, "config");
if (config) {
ret = read_config(s, config);
if (ret < 0) {
goto fail;
}
}
/* Set initial state */
s->state = 1;
/* Open the backing file */
ret = bdrv_file_open(&bs->file, filename, flags);
if (ret < 0) {
return ret;
filename = qemu_opt_get(opts, "x-image");
if (filename == NULL) {
ret = -EINVAL;
goto fail;
}
return 0;
ret = bdrv_file_open(&bs->file, filename, NULL, flags);
if (ret < 0) {
goto fail;
}
ret = 0;
fail:
qemu_opts_del(opts);
return ret;
}
static void error_callback_bh(void *opaque)
@ -568,17 +621,17 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
}
static BlockDriver bdrv_blkdebug = {
.format_name = "blkdebug",
.protocol_name = "blkdebug",
.format_name = "blkdebug",
.protocol_name = "blkdebug",
.instance_size = sizeof(BDRVBlkdebugState),
.instance_size = sizeof(BDRVBlkdebugState),
.bdrv_parse_filename = blkdebug_parse_filename,
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
.bdrv_getlength = blkdebug_getlength,
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
.bdrv_getlength = blkdebug_getlength,
.bdrv_aio_readv = blkdebug_aio_readv,
.bdrv_aio_writev = blkdebug_aio_writev,
.bdrv_aio_readv = blkdebug_aio_readv,
.bdrv_aio_writev = blkdebug_aio_writev,
.bdrv_debug_event = blkdebug_debug_event,
.bdrv_debug_breakpoint = blkdebug_debug_breakpoint,

View File

@ -69,43 +69,100 @@ static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
}
/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
static void blkverify_parse_filename(const char *filename, QDict *options,
Error **errp)
{
BDRVBlkverifyState *s = bs->opaque;
int ret;
char *raw, *c;
const char *c;
QString *raw_path;
/* Parse the blkverify: prefix */
if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
return -EINVAL;
if (!strstart(filename, "blkverify:", &filename)) {
error_setg(errp, "File name string must start with 'blkverify:'");
return;
}
filename += strlen("blkverify:");
/* Parse the raw image filename */
c = strchr(filename, ':');
if (c == NULL) {
return -EINVAL;
error_setg(errp, "blkverify requires raw copy and original image path");
return;
}
raw = g_strdup(filename);
raw[c - filename] = '\0';
ret = bdrv_file_open(&bs->file, raw, flags);
g_free(raw);
if (ret < 0) {
return ret;
}
/* TODO Implement option pass-through and set raw.filename here */
raw_path = qstring_from_substr(filename, 0, c - filename - 1);
qdict_put(options, "x-raw", raw_path);
/* TODO Allow multi-level nesting and set file.filename here */
filename = c + 1;
qdict_put(options, "x-image", qstring_from_str(filename));
}
static QemuOptsList runtime_opts = {
.name = "blkverify",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "x-raw",
.type = QEMU_OPT_STRING,
.help = "[internal use only, will be removed]",
},
{
.name = "x-image",
.type = QEMU_OPT_STRING,
.help = "[internal use only, will be removed]",
},
{ /* end of list */ }
},
};
static int blkverify_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVBlkverifyState *s = bs->opaque;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename, *raw;
int ret;
opts = qemu_opts_create_nofail(&runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
ret = -EINVAL;
goto fail;
}
/* Parse the raw image filename */
raw = qemu_opt_get(opts, "x-raw");
if (raw == NULL) {
ret = -EINVAL;
goto fail;
}
ret = bdrv_file_open(&bs->file, raw, NULL, flags);
if (ret < 0) {
goto fail;
}
/* Open the test file */
filename = qemu_opt_get(opts, "x-image");
if (filename == NULL) {
ret = -EINVAL;
goto fail;
}
s->test_file = bdrv_new("");
ret = bdrv_open(s->test_file, filename, NULL, flags, NULL);
if (ret < 0) {
bdrv_delete(s->test_file);
s->test_file = NULL;
return ret;
goto fail;
}
return 0;
ret = 0;
fail:
return ret;
}
static void blkverify_close(BlockDriverState *bs)
@ -343,19 +400,18 @@ static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
}
static BlockDriver bdrv_blkverify = {
.format_name = "blkverify",
.protocol_name = "blkverify",
.format_name = "blkverify",
.protocol_name = "blkverify",
.instance_size = sizeof(BDRVBlkverifyState),
.instance_size = sizeof(BDRVBlkverifyState),
.bdrv_parse_filename = blkverify_parse_filename,
.bdrv_file_open = blkverify_open,
.bdrv_close = blkverify_close,
.bdrv_getlength = blkverify_getlength,
.bdrv_getlength = blkverify_getlength,
.bdrv_file_open = blkverify_open,
.bdrv_close = blkverify_close,
.bdrv_aio_readv = blkverify_aio_readv,
.bdrv_aio_writev = blkverify_aio_writev,
.bdrv_aio_flush = blkverify_aio_flush,
.bdrv_aio_readv = blkverify_aio_readv,
.bdrv_aio_writev = blkverify_aio_writev,
.bdrv_aio_flush = blkverify_aio_flush,
};
static void bdrv_blkverify_init(void)

View File

@ -173,7 +173,7 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}
static BlockJobType commit_job_type = {
static const BlockJobType commit_job_type = {
.instance_size = sizeof(CommitBlockJob),
.job_type = "commit",
.set_speed = commit_set_speed,

View File

@ -279,7 +279,7 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
return ret;
}
ret = bdrv_file_open(&cow_bs, filename, BDRV_O_RDWR);
ret = bdrv_file_open(&cow_bs, filename, NULL, BDRV_O_RDWR);
if (ret < 0) {
return ret;
}
@ -340,6 +340,7 @@ static BlockDriver bdrv_cow = {
.bdrv_open = cow_open,
.bdrv_close = cow_close,
.bdrv_create = cow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,

View File

@ -81,6 +81,7 @@ typedef struct BDRVCURLState {
CURLState states[CURL_NUM_STATES];
char *url;
size_t readahead_size;
bool accept_range;
} BDRVCURLState;
static void curl_clean_state(CURLState *s);
@ -110,14 +111,15 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
return 0;
}
static size_t curl_size_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
{
CURLState *s = ((CURLState*)opaque);
BDRVCURLState *s = opaque;
size_t realsize = size * nmemb;
size_t fsize;
const char *accept_line = "Accept-Ranges: bytes";
if(sscanf(ptr, "Content-Length: %zd", &fsize) == 1) {
s->s->len = fsize;
if (realsize >= strlen(accept_line)
&& strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) {
s->accept_range = true;
}
return realsize;
@ -335,11 +337,9 @@ static void curl_clean_state(CURLState *s)
s->in_use = 0;
}
static int curl_open(BlockDriverState *bs, const char *filename, int flags)
static void curl_parse_filename(const char *filename, QDict *options,
Error **errp)
{
BDRVCURLState *s = bs->opaque;
CURLState *state = NULL;
double d;
#define RA_OPTSTR ":readahead="
char *file;
@ -347,19 +347,17 @@ static int curl_open(BlockDriverState *bs, const char *filename, int flags)
const char *ra_val;
int parse_state = 0;
static int inited = 0;
file = g_strdup(filename);
s->readahead_size = READ_AHEAD_SIZE;
/* Parse a trailing ":readahead=#:" param, if present. */
ra = file + strlen(file) - 1;
while (ra >= file) {
if (parse_state == 0) {
if (*ra == ':')
if (*ra == ':') {
parse_state++;
else
} else {
break;
}
} else if (parse_state == 1) {
if (*ra > '9' || *ra < '0') {
char *opt_start = ra - strlen(RA_OPTSTR) + 1;
@ -368,46 +366,108 @@ static int curl_open(BlockDriverState *bs, const char *filename, int flags)
ra_val = ra + 1;
ra -= strlen(RA_OPTSTR) - 1;
*ra = '\0';
s->readahead_size = atoi(ra_val);
break;
} else {
break;
qdict_put(options, "readahead", qstring_from_str(ra_val));
}
break;
}
}
ra--;
}
qdict_put(options, "url", qstring_from_str(file));
g_free(file);
}
static QemuOptsList runtime_opts = {
.name = "curl",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "url",
.type = QEMU_OPT_STRING,
.help = "URL to open",
},
{
.name = "readahead",
.type = QEMU_OPT_SIZE,
.help = "Readahead size",
},
{ /* end of list */ }
},
};
static int curl_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVCURLState *s = bs->opaque;
CURLState *state = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *file;
double d;
static int inited = 0;
if (flags & BDRV_O_RDWR) {
qerror_report(ERROR_CLASS_GENERIC_ERROR,
"curl block device does not support writes");
return -EROFS;
}
opts = qemu_opts_create_nofail(&runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
goto out_noclean;
}
s->readahead_size = qemu_opt_get_size(opts, "readahead", READ_AHEAD_SIZE);
if ((s->readahead_size & 0x1ff) != 0) {
fprintf(stderr, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512\n",
s->readahead_size);
goto out_noclean;
}
file = qemu_opt_get(opts, "url");
if (file == NULL) {
qerror_report(ERROR_CLASS_GENERIC_ERROR, "curl block driver requires "
"an 'url' option");
goto out_noclean;
}
if (!inited) {
curl_global_init(CURL_GLOBAL_ALL);
inited = 1;
}
DPRINTF("CURL: Opening %s\n", file);
s->url = file;
s->url = g_strdup(file);
state = curl_init_state(s);
if (!state)
goto out_noclean;
// Get file size
s->accept_range = false;
curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_size_cb);
curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
curl_header_cb);
curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s);
if (curl_easy_perform(state->curl))
goto out;
curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d);
curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_read_cb);
curl_easy_setopt(state->curl, CURLOPT_NOBODY, 0);
if (d)
s->len = (size_t)d;
else if(!s->len)
goto out;
if ((!strncasecmp(s->url, "http://", strlen("http://"))
|| !strncasecmp(s->url, "https://", strlen("https://")))
&& !s->accept_range) {
pstrcpy(state->errmsg, CURL_ERROR_SIZE,
"Server does not support 'range' (byte ranges).");
goto out;
}
DPRINTF("CURL: Size = %zd\n", s->len);
curl_clean_state(state);
@ -418,10 +478,11 @@ static int curl_open(BlockDriverState *bs, const char *filename, int flags)
// initialize the multi interface!
s->multi = curl_multi_init();
curl_multi_setopt( s->multi, CURLMOPT_SOCKETDATA, s);
curl_multi_setopt( s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb );
curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s);
curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
curl_multi_do(s);
qemu_opts_del(opts);
return 0;
out:
@ -429,7 +490,8 @@ out:
curl_easy_cleanup(state->curl);
state->curl = NULL;
out_noclean:
g_free(file);
g_free(s->url);
qemu_opts_del(opts);
return -EINVAL;
}
@ -567,63 +629,68 @@ static int64_t curl_getlength(BlockDriverState *bs)
}
static BlockDriver bdrv_http = {
.format_name = "http",
.protocol_name = "http",
.format_name = "http",
.protocol_name = "http",
.instance_size = sizeof(BDRVCURLState),
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.instance_size = sizeof(BDRVCURLState),
.bdrv_parse_filename = curl_parse_filename,
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.bdrv_aio_readv = curl_aio_readv,
.bdrv_aio_readv = curl_aio_readv,
};
static BlockDriver bdrv_https = {
.format_name = "https",
.protocol_name = "https",
.format_name = "https",
.protocol_name = "https",
.instance_size = sizeof(BDRVCURLState),
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.instance_size = sizeof(BDRVCURLState),
.bdrv_parse_filename = curl_parse_filename,
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.bdrv_aio_readv = curl_aio_readv,
.bdrv_aio_readv = curl_aio_readv,
};
static BlockDriver bdrv_ftp = {
.format_name = "ftp",
.protocol_name = "ftp",
.format_name = "ftp",
.protocol_name = "ftp",
.instance_size = sizeof(BDRVCURLState),
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.instance_size = sizeof(BDRVCURLState),
.bdrv_parse_filename = curl_parse_filename,
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.bdrv_aio_readv = curl_aio_readv,
.bdrv_aio_readv = curl_aio_readv,
};
static BlockDriver bdrv_ftps = {
.format_name = "ftps",
.protocol_name = "ftps",
.format_name = "ftps",
.protocol_name = "ftps",
.instance_size = sizeof(BDRVCURLState),
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.instance_size = sizeof(BDRVCURLState),
.bdrv_parse_filename = curl_parse_filename,
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.bdrv_aio_readv = curl_aio_readv,
.bdrv_aio_readv = curl_aio_readv,
};
static BlockDriver bdrv_tftp = {
.format_name = "tftp",
.protocol_name = "tftp",
.format_name = "tftp",
.protocol_name = "tftp",
.instance_size = sizeof(BDRVCURLState),
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.instance_size = sizeof(BDRVCURLState),
.bdrv_parse_filename = curl_parse_filename,
.bdrv_file_open = curl_open,
.bdrv_close = curl_close,
.bdrv_getlength = curl_getlength,
.bdrv_aio_readv = curl_aio_readv,
.bdrv_aio_readv = curl_aio_readv,
};
static void curl_block_init(void)

View File

@ -51,9 +51,16 @@ typedef struct BDRVDMGState {
static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
{
int len=strlen(filename);
if(len>4 && !strcmp(filename+len-4,".dmg"))
return 2;
int len;
if (!filename) {
return 0;
}
len = strlen(filename);
if (len > 4 && !strcmp(filename + len - 4, ".dmg")) {
return 2;
}
return 0;
}

View File

@ -282,13 +282,42 @@ static int qemu_gluster_aio_flush_cb(void *opaque)
return (s->qemu_aio_count > 0);
}
static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
int bdrv_flags)
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "gluster",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "URL to the gluster image",
},
{ /* end of list */ }
},
};
static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
int bdrv_flags)
{
BDRVGlusterState *s = bs->opaque;
int open_flags = O_BINARY;
int ret = 0;
GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
opts = qemu_opts_create_nofail(&runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
ret = -EINVAL;
goto out;
}
filename = qemu_opt_get(opts, "filename");
s->glfs = qemu_gluster_init(gconf, filename);
if (!s->glfs) {
@ -322,6 +351,7 @@ static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
out:
qemu_opts_del(opts);
qemu_gluster_gconf_free(gconf);
if (!ret) {
return ret;
@ -463,6 +493,19 @@ out:
return NULL;
}
static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
{
int ret;
BDRVGlusterState *s = bs->opaque;
ret = glfs_ftruncate(s->fd, offset);
if (ret < 0) {
return -errno;
}
return 0;
}
static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
@ -502,6 +545,39 @@ out:
return NULL;
}
#ifdef CONFIG_GLUSTERFS_DISCARD
static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
void *opaque)
{
int ret;
GlusterAIOCB *acb;
BDRVGlusterState *s = bs->opaque;
size_t size;
off_t offset;
offset = sector_num * BDRV_SECTOR_SIZE;
size = nb_sectors * BDRV_SECTOR_SIZE;
acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
acb->size = 0;
acb->ret = 0;
acb->finished = NULL;
s->qemu_aio_count++;
ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
if (ret < 0) {
goto out;
}
return &acb->common;
out:
s->qemu_aio_count--;
qemu_aio_release(acb);
return NULL;
}
#endif
static int64_t qemu_gluster_getlength(BlockDriverState *bs)
{
BDRVGlusterState *s = bs->opaque;
@ -544,6 +620,12 @@ static void qemu_gluster_close(BlockDriverState *bs)
glfs_fini(s->glfs);
}
static int qemu_gluster_has_zero_init(BlockDriverState *bs)
{
/* GlusterFS volume could be backed by a block device */
return 0;
}
static QEMUOptionParameter qemu_gluster_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@ -562,9 +644,14 @@ static BlockDriver bdrv_gluster = {
.bdrv_create = qemu_gluster_create,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
.bdrv_truncate = qemu_gluster_truncate,
.bdrv_aio_readv = qemu_gluster_aio_readv,
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_aio_discard = qemu_gluster_aio_discard,
#endif
.create_options = qemu_gluster_create_options,
};
@ -577,9 +664,14 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_create = qemu_gluster_create,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
.bdrv_truncate = qemu_gluster_truncate,
.bdrv_aio_readv = qemu_gluster_aio_readv,
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_aio_discard = qemu_gluster_aio_discard,
#endif
.create_options = qemu_gluster_create_options,
};
@ -592,9 +684,14 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_create = qemu_gluster_create,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
.bdrv_truncate = qemu_gluster_truncate,
.bdrv_aio_readv = qemu_gluster_aio_readv,
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_aio_discard = qemu_gluster_aio_discard,
#endif
.create_options = qemu_gluster_create_options,
};
@ -607,9 +704,14 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_create = qemu_gluster_create,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
.bdrv_truncate = qemu_gluster_truncate,
.bdrv_aio_readv = qemu_gluster_aio_readv,
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_aio_discard = qemu_gluster_aio_discard,
#endif
.create_options = qemu_gluster_create_options,
};

View File

@ -31,14 +31,15 @@
#include "qemu/error-report.h"
#include "block/block_int.h"
#include "trace.h"
#include "hw/scsi-defs.h"
#include "block/scsi.h"
#include "qemu/iov.h"
#include <iscsi/iscsi.h>
#include <iscsi/scsi-lowlevel.h>
#ifdef __linux__
#include <scsi/sg.h>
#include <hw/scsi-defs.h>
#include <block/scsi.h>
#endif
typedef struct IscsiLun {
@ -61,8 +62,6 @@ typedef struct IscsiAIOCB {
int status;
int canceled;
int retries;
size_t read_size;
size_t read_offset;
int64_t sector_num;
int nb_sectors;
#ifdef __linux__
@ -218,10 +217,8 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
if (status == SCSI_STATUS_CHECK_CONDITION
&& acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
&& acb->retries-- > 0) {
if (acb->task != NULL) {
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
scsi_free_scsi_task(acb->task);
acb->task = NULL;
if (iscsi_aio_writev_acb(acb) == 0) {
iscsi_set_events(acb->iscsilun);
return;
@ -235,11 +232,30 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
iscsi_schedule_bh(acb);
}
static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
{
return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
}
static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
{
return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
}
static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
IscsiLun *iscsilun)
{
if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
(nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
error_report("iSCSI misaligned request: "
"iscsilun->block_size %u, sector_num %" PRIi64
", nb_sectors %d",
iscsilun->block_size, sector_num, nb_sectors);
return 0;
}
return 1;
}
static int
iscsi_aio_writev_acb(IscsiAIOCB *acb)
{
@ -287,7 +303,7 @@ iscsi_aio_writev_acb(IscsiAIOCB *acb)
lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
*(uint32_t *)&acb->task->cdb[2] = htonl(lba >> 32);
*(uint32_t *)&acb->task->cdb[6] = htonl(lba & 0xffffffff);
num_sectors = size / acb->iscsilun->block_size;
num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
*(uint32_t *)&acb->task->cdb[10] = htonl(num_sectors);
acb->task->expxferlen = size;
@ -303,6 +319,7 @@ iscsi_aio_writev_acb(IscsiAIOCB *acb)
acb);
#endif
if (ret != 0) {
scsi_free_scsi_task(acb->task);
g_free(acb->buf);
return -1;
}
@ -323,6 +340,10 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
IscsiLun *iscsilun = bs->opaque;
IscsiAIOCB *acb;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return NULL;
}
acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
trace_iscsi_aio_writev(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
@ -333,9 +354,6 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
acb->retries = ISCSI_CMD_RETRIES;
if (iscsi_aio_writev_acb(acb) != 0) {
if (acb->task) {
scsi_free_scsi_task(acb->task);
}
qemu_aio_release(acb);
return NULL;
}
@ -364,10 +382,8 @@ iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status,
if (status == SCSI_STATUS_CHECK_CONDITION
&& acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
&& acb->retries-- > 0) {
if (acb->task != NULL) {
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
scsi_free_scsi_task(acb->task);
acb->task = NULL;
if (iscsi_aio_readv_acb(acb) == 0) {
iscsi_set_events(acb->iscsilun);
return;
@ -385,6 +401,7 @@ static int
iscsi_aio_readv_acb(IscsiAIOCB *acb)
{
struct iscsi_context *iscsi = acb->iscsilun->iscsi;
size_t size;
uint64_t lba;
uint32_t num_sectors;
int ret;
@ -397,20 +414,7 @@ iscsi_aio_readv_acb(IscsiAIOCB *acb)
acb->status = -EINPROGRESS;
acb->buf = NULL;
/* If LUN blocksize is bigger than BDRV_BLOCK_SIZE a read from QEMU
* may be misaligned to the LUN, so we may need to read some extra
* data.
*/
acb->read_offset = 0;
if (acb->iscsilun->block_size > BDRV_SECTOR_SIZE) {
uint64_t bdrv_offset = BDRV_SECTOR_SIZE * acb->sector_num;
acb->read_offset = bdrv_offset % acb->iscsilun->block_size;
}
num_sectors = (acb->read_size + acb->iscsilun->block_size
+ acb->read_offset - 1)
/ acb->iscsilun->block_size;
size = acb->nb_sectors * BDRV_SECTOR_SIZE;
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
@ -421,8 +425,9 @@ iscsi_aio_readv_acb(IscsiAIOCB *acb)
memset(acb->task, 0, sizeof(struct scsi_task));
acb->task->xfer_dir = SCSI_XFER_READ;
acb->task->expxferlen = size;
lba = sector_qemu2lun(acb->sector_num, acb->iscsilun);
acb->task->expxferlen = acb->read_size;
num_sectors = sector_qemu2lun(acb->nb_sectors, acb->iscsilun);
switch (acb->iscsilun->type) {
case TYPE_DISK:
@ -445,6 +450,7 @@ iscsi_aio_readv_acb(IscsiAIOCB *acb)
NULL,
acb);
if (ret != 0) {
scsi_free_scsi_task(acb->task);
return -1;
}
@ -469,6 +475,10 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
IscsiLun *iscsilun = bs->opaque;
IscsiAIOCB *acb;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return NULL;
}
acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
trace_iscsi_aio_readv(iscsilun->iscsi, sector_num, nb_sectors, opaque, acb);
@ -476,13 +486,9 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
acb->sector_num = sector_num;
acb->iscsilun = iscsilun;
acb->qiov = qiov;
acb->read_size = BDRV_SECTOR_SIZE * (size_t)acb->nb_sectors;
acb->retries = ISCSI_CMD_RETRIES;
if (iscsi_aio_readv_acb(acb) != 0) {
if (acb->task) {
scsi_free_scsi_task(acb->task);
}
qemu_aio_release(acb);
return NULL;
}
@ -509,10 +515,8 @@ iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
if (status == SCSI_STATUS_CHECK_CONDITION
&& acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
&& acb->retries-- > 0) {
if (acb->task != NULL) {
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
scsi_free_scsi_task(acb->task);
acb->task = NULL;
if (iscsi_aio_flush_acb(acb) == 0) {
iscsi_set_events(acb->iscsilun);
return;
@ -589,10 +593,8 @@ iscsi_unmap_cb(struct iscsi_context *iscsi, int status,
if (status == SCSI_STATUS_CHECK_CONDITION
&& acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
&& acb->retries-- > 0) {
if (acb->task != NULL) {
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
scsi_free_scsi_task(acb->task);
acb->task = NULL;
if (iscsi_aio_discard_acb(acb) == 0) {
iscsi_set_events(acb->iscsilun);
return;
@ -647,9 +649,6 @@ iscsi_aio_discard(BlockDriverState *bs,
acb->retries = ISCSI_CMD_RETRIES;
if (iscsi_aio_discard_acb(acb) != 0) {
if (acb->task) {
scsi_free_scsi_task(acb->task);
}
qemu_aio_release(acb);
return NULL;
}
@ -666,6 +665,9 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
{
IscsiAIOCB *acb = opaque;
g_free(acb->buf);
acb->buf = NULL;
if (acb->canceled != 0) {
return;
}
@ -742,14 +744,30 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
acb->task->expxferlen = acb->ioh->dxfer_len;
data.size = 0;
if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
data.data = acb->ioh->dxferp;
data.size = acb->ioh->dxfer_len;
if (acb->ioh->iovec_count == 0) {
data.data = acb->ioh->dxferp;
data.size = acb->ioh->dxfer_len;
} else {
#if defined(LIBISCSI_FEATURE_IOVECTOR)
scsi_task_set_iov_out(acb->task,
(struct scsi_iovec *) acb->ioh->dxferp,
acb->ioh->iovec_count);
#else
struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
acb->buf = g_malloc(acb->ioh->dxfer_len);
data.data = acb->buf;
data.size = iov_to_buf(iov, acb->ioh->iovec_count, 0,
acb->buf, acb->ioh->dxfer_len);
#endif
}
}
if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
iscsi_aio_ioctl_cb,
(acb->task->xfer_dir == SCSI_XFER_WRITE) ?
&data : NULL,
(data.size > 0) ? &data : NULL,
acb) != 0) {
scsi_free_scsi_task(acb->task);
qemu_aio_release(acb);
@ -758,9 +776,26 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
/* tell libiscsi to read straight into the buffer we got from ioctl */
if (acb->task->xfer_dir == SCSI_XFER_READ) {
scsi_task_add_data_in_buffer(acb->task,
acb->ioh->dxfer_len,
acb->ioh->dxferp);
if (acb->ioh->iovec_count == 0) {
scsi_task_add_data_in_buffer(acb->task,
acb->ioh->dxfer_len,
acb->ioh->dxferp);
} else {
#if defined(LIBISCSI_FEATURE_IOVECTOR)
scsi_task_set_iov_in(acb->task,
(struct scsi_iovec *) acb->ioh->dxferp,
acb->ioh->iovec_count);
#else
int i;
for (i = 0; i < acb->ioh->iovec_count; i++) {
struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
scsi_task_add_data_in_buffer(acb->task,
iov[i].iov_len,
iov[i].iov_base);
}
#endif
}
}
iscsi_set_events(iscsilun);
@ -946,68 +981,80 @@ static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
int ret = 0;
int retries = ISCSI_CMD_RETRIES;
try_again:
switch (iscsilun->type) {
case TYPE_DISK:
task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
if (task == NULL || task->status != SCSI_STATUS_GOOD) {
if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
&& task->sense.key == SCSI_SENSE_UNIT_ATTENTION
&& retries-- > 0) {
scsi_free_scsi_task(task);
goto try_again;
}
error_report("iSCSI: failed to send readcapacity16 command.");
ret = -EINVAL;
goto out;
do {
if (task != NULL) {
scsi_free_scsi_task(task);
task = NULL;
}
rc16 = scsi_datain_unmarshall(task);
if (rc16 == NULL) {
error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
ret = -EINVAL;
goto out;
}
iscsilun->block_size = rc16->block_length;
iscsilun->num_blocks = rc16->returned_lba + 1;
break;
case TYPE_ROM:
task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
if (task == NULL || task->status != SCSI_STATUS_GOOD) {
error_report("iSCSI: failed to send readcapacity10 command.");
ret = -EINVAL;
goto out;
}
rc10 = scsi_datain_unmarshall(task);
if (rc10 == NULL) {
error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
ret = -EINVAL;
goto out;
}
iscsilun->block_size = rc10->block_size;
if (rc10->lba == 0) {
/* blank disk loaded */
iscsilun->num_blocks = 0;
} else {
iscsilun->num_blocks = rc10->lba + 1;
}
break;
default:
break;
}
out:
switch (iscsilun->type) {
case TYPE_DISK:
task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
if (task != NULL && task->status == SCSI_STATUS_GOOD) {
rc16 = scsi_datain_unmarshall(task);
if (rc16 == NULL) {
error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
ret = -EINVAL;
} else {
iscsilun->block_size = rc16->block_length;
iscsilun->num_blocks = rc16->returned_lba + 1;
}
}
break;
case TYPE_ROM:
task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
if (task != NULL && task->status == SCSI_STATUS_GOOD) {
rc10 = scsi_datain_unmarshall(task);
if (rc10 == NULL) {
error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
ret = -EINVAL;
} else {
iscsilun->block_size = rc10->block_size;
if (rc10->lba == 0) {
/* blank disk loaded */
iscsilun->num_blocks = 0;
} else {
iscsilun->num_blocks = rc10->lba + 1;
}
}
}
break;
default:
return 0;
}
} while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
&& task->sense.key == SCSI_SENSE_UNIT_ATTENTION
&& retries-- > 0);
if (task == NULL || task->status != SCSI_STATUS_GOOD) {
error_report("iSCSI: failed to send readcapacity10 command.");
ret = -EINVAL;
}
if (task) {
scsi_free_scsi_task(task);
}
return ret;
}
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "iscsi",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "URL to the iscsi image",
},
{ /* end of list */ }
},
};
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
*/
static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
static int iscsi_open(BlockDriverState *bs, QDict *options, int flags)
{
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = NULL;
@ -1015,6 +1062,9 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
struct scsi_task *task = NULL;
struct scsi_inquiry_standard *inq = NULL;
char *initiator_name = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
int ret;
if ((BDRV_SECTOR_SIZE % 512) != 0) {
@ -1024,6 +1074,18 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
return -EINVAL;
}
opts = qemu_opts_create_nofail(&runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
ret = -EINVAL;
goto out;
}
filename = qemu_opt_get(opts, "filename");
iscsi_url = iscsi_parse_full_url(iscsi, filename);
if (iscsi_url == NULL) {
error_report("Failed to parse URL : %s", filename);
@ -1106,8 +1168,7 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
if ((ret = iscsi_readcapacity_sync(iscsilun)) != 0) {
goto out;
}
bs->total_sectors = iscsilun->num_blocks *
iscsilun->block_size / BDRV_SECTOR_SIZE ;
bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
/* Medium changer or tape. We dont have any emulation for this so this must
* be sg ioctl compatible. We force it to be sg, otherwise qemu will try
@ -1125,6 +1186,7 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
#endif
out:
qemu_opts_del(opts);
if (initiator_name != NULL) {
g_free(initiator_name);
}
@ -1189,6 +1251,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
int64_t total_size = 0;
BlockDriverState bs;
IscsiLun *iscsilun = NULL;
QDict *bs_options;
memset(&bs, 0, sizeof(BlockDriverState));
@ -1203,7 +1266,11 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
bs.opaque = g_malloc0(sizeof(struct IscsiLun));
iscsilun = bs.opaque;
ret = iscsi_open(&bs, filename, 0);
bs_options = qdict_new();
qdict_put(bs_options, "filename", qstring_from_str(filename));
ret = iscsi_open(&bs, bs_options, 0);
QDECREF(bs_options);
if (ret != 0) {
goto out;
}
@ -1217,6 +1284,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
}
if (bs.total_sectors < total_size) {
ret = -ENOSPC;
goto out;
}
ret = 0;

View File

@ -507,12 +507,12 @@ static void mirror_complete(BlockJob *job, Error **errp)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
int ret;
ret = bdrv_open_backing_file(s->target);
ret = bdrv_open_backing_file(s->target, NULL);
if (ret < 0) {
char backing_filename[PATH_MAX];
bdrv_get_full_backing_filename(s->target, backing_filename,
sizeof(backing_filename));
error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
error_setg_file_open(errp, -ret, backing_filename);
return;
}
if (!s->synced) {
@ -524,7 +524,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
block_job_resume(job);
}
static BlockJobType mirror_job_type = {
static const BlockJobType mirror_job_type = {
.instance_size = sizeof(MirrorBlockJob),
.job_type = "mirror",
.set_speed = mirror_set_speed,

View File

@ -32,6 +32,8 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include "qemu/sockets.h"
#include "qapi/qmp/qjson.h"
#include "qapi/qmp/qint.h"
#include <sys/types.h>
#include <unistd.h>
@ -65,17 +67,19 @@ typedef struct BDRVNBDState {
Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
struct nbd_reply reply;
int is_unix;
char *host_spec;
bool is_unix;
QemuOpts *socket_opts;
char *export_name; /* An NBD server may export several devices */
} BDRVNBDState;
static int nbd_parse_uri(BDRVNBDState *s, const char *filename)
static int nbd_parse_uri(const char *filename, QDict *options)
{
URI *uri;
const char *p;
QueryParams *qp = NULL;
int ret = 0;
bool is_unix;
uri = uri_parse(filename);
if (!uri) {
@ -84,11 +88,11 @@ static int nbd_parse_uri(BDRVNBDState *s, const char *filename)
/* transport */
if (!strcmp(uri->scheme, "nbd")) {
s->is_unix = false;
is_unix = false;
} else if (!strcmp(uri->scheme, "nbd+tcp")) {
s->is_unix = false;
is_unix = false;
} else if (!strcmp(uri->scheme, "nbd+unix")) {
s->is_unix = true;
is_unix = true;
} else {
ret = -EINVAL;
goto out;
@ -97,32 +101,44 @@ static int nbd_parse_uri(BDRVNBDState *s, const char *filename)
p = uri->path ? uri->path : "/";
p += strspn(p, "/");
if (p[0]) {
s->export_name = g_strdup(p);
qdict_put(options, "export", qstring_from_str(p));
}
qp = query_params_parse(uri->query);
if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
ret = -EINVAL;
goto out;
}
if (s->is_unix) {
if (is_unix) {
/* nbd+unix:///export?socket=path */
if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
ret = -EINVAL;
goto out;
}
s->host_spec = g_strdup(qp->p[0].value);
qdict_put(options, "path", qstring_from_str(qp->p[0].value));
} else {
/* nbd[+tcp]://host:port/export */
QString *host;
/* nbd[+tcp]://host[:port]/export */
if (!uri->server) {
ret = -EINVAL;
goto out;
}
if (!uri->port) {
uri->port = NBD_DEFAULT_PORT;
/* strip braces from literal IPv6 address */
if (uri->server[0] == '[') {
host = qstring_from_substr(uri->server, 1,
strlen(uri->server) - 2);
} else {
host = qstring_from_str(uri->server);
}
qdict_put(options, "host", host);
if (uri->port) {
char* port_str = g_strdup_printf("%d", uri->port);
qdict_put(options, "port", qstring_from_str(port_str));
g_free(port_str);
}
s->host_spec = g_strdup_printf("%s:%d", uri->server, uri->port);
}
out:
@ -133,16 +149,29 @@ out:
return ret;
}
static int nbd_config(BDRVNBDState *s, const char *filename)
static void nbd_parse_filename(const char *filename, QDict *options,
Error **errp)
{
char *file;
char *export_name;
const char *host_spec;
const char *unixpath;
int err = -EINVAL;
if (qdict_haskey(options, "host")
|| qdict_haskey(options, "port")
|| qdict_haskey(options, "path"))
{
error_setg(errp, "host/port/path and a file name may not be specified "
"at the same time");
return;
}
if (strstr(filename, "://")) {
return nbd_parse_uri(s, filename);
int ret = nbd_parse_uri(filename, options);
if (ret < 0) {
error_setg(errp, "No valid URL specified");
}
return;
}
file = g_strdup(filename);
@ -154,34 +183,79 @@ static int nbd_config(BDRVNBDState *s, const char *filename)
}
export_name[0] = 0; /* truncate 'file' */
export_name += strlen(EN_OPTSTR);
s->export_name = g_strdup(export_name);
qdict_put(options, "export", qstring_from_str(export_name));
}
/* extract the host_spec - fail if it's not nbd:... */
if (!strstart(file, "nbd:", &host_spec)) {
error_setg(errp, "File name string for NBD must start with 'nbd:'");
goto out;
}
if (!*host_spec) {
goto out;
}
/* are we a UNIX or TCP socket? */
if (strstart(host_spec, "unix:", &unixpath)) {
s->is_unix = true;
s->host_spec = g_strdup(unixpath);
qdict_put(options, "path", qstring_from_str(unixpath));
} else {
s->is_unix = false;
s->host_spec = g_strdup(host_spec);
}
InetSocketAddress *addr = NULL;
err = 0;
addr = inet_parse(host_spec, errp);
if (error_is_set(errp)) {
goto out;
}
qdict_put(options, "host", qstring_from_str(addr->host));
qdict_put(options, "port", qstring_from_str(addr->port));
qapi_free_InetSocketAddress(addr);
}
out:
g_free(file);
if (err != 0) {
g_free(s->export_name);
g_free(s->host_spec);
}
return err;
}
static int nbd_config(BDRVNBDState *s, QDict *options)
{
Error *local_err = NULL;
if (qdict_haskey(options, "path")) {
if (qdict_haskey(options, "host")) {
qerror_report(ERROR_CLASS_GENERIC_ERROR, "path and host may not "
"be used at the same time.");
return -EINVAL;
}
s->is_unix = true;
} else if (qdict_haskey(options, "host")) {
s->is_unix = false;
} else {
return -EINVAL;
}
s->socket_opts = qemu_opts_create_nofail(&socket_optslist);
qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
return -EINVAL;
}
if (!qemu_opt_get(s->socket_opts, "port")) {
qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT);
}
s->export_name = g_strdup(qdict_get_try_str(options, "export"));
if (s->export_name) {
qdict_del(options, "export");
}
return 0;
}
static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
{
int i;
@ -269,13 +343,23 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
s->send_coroutine = qemu_coroutine_self();
qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
nbd_have_request, s);
rc = nbd_send_request(s->sock, request);
if (rc >= 0 && qiov) {
ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
offset, request->len);
if (ret != request->len) {
return -EIO;
if (qiov) {
if (!s->is_unix) {
socket_set_cork(s->sock, 1);
}
rc = nbd_send_request(s->sock, request);
if (rc >= 0) {
ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
offset, request->len);
if (ret != request->len) {
rc = -EIO;
}
}
if (!s->is_unix) {
socket_set_cork(s->sock, 0);
}
} else {
rc = nbd_send_request(s->sock, request);
}
qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
nbd_have_request, s);
@ -328,9 +412,12 @@ static int nbd_establish_connection(BlockDriverState *bs)
size_t blocksize;
if (s->is_unix) {
sock = unix_socket_outgoing(s->host_spec);
sock = unix_socket_outgoing(qemu_opt_get(s->socket_opts, "path"));
} else {
sock = tcp_socket_outgoing_spec(s->host_spec);
sock = tcp_socket_outgoing_opts(s->socket_opts);
if (sock >= 0) {
socket_set_nodelay(sock);
}
}
/* Failed to establish connection */
@ -350,7 +437,7 @@ static int nbd_establish_connection(BlockDriverState *bs)
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
socket_set_nonblock(sock);
qemu_set_nonblock(sock);
qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL,
nbd_have_request, s);
@ -376,7 +463,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
closesocket(s->sock);
}
static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
static int nbd_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVNBDState *s = bs->opaque;
int result;
@ -385,7 +472,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
qemu_co_mutex_init(&s->free_sema);
/* Pop the config into our state object. Exit if invalid. */
result = nbd_config(s, filename);
result = nbd_config(s, options);
if (result != 0) {
return result;
}
@ -531,7 +618,7 @@ static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
return 0;
}
request.type = NBD_CMD_TRIM;
request.from = sector_num * 512;;
request.from = sector_num * 512;
request.len = nb_sectors * 512;
nbd_coroutine_start(s, &request);
@ -549,7 +636,7 @@ static void nbd_close(BlockDriverState *bs)
{
BDRVNBDState *s = bs->opaque;
g_free(s->export_name);
g_free(s->host_spec);
qemu_opts_del(s->socket_opts);
nbd_teardown_connection(bs);
}
@ -565,6 +652,7 @@ static BlockDriver bdrv_nbd = {
.format_name = "nbd",
.protocol_name = "nbd",
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev = nbd_co_writev,
@ -578,6 +666,7 @@ static BlockDriver bdrv_nbd_tcp = {
.format_name = "nbd",
.protocol_name = "nbd+tcp",
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev = nbd_co_writev,
@ -591,6 +680,7 @@ static BlockDriver bdrv_nbd_unix = {
.format_name = "nbd",
.protocol_name = "nbd+unix",
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev = nbd_co_writev,

470
block/qapi.c Normal file
View File

@ -0,0 +1,470 @@
/*
* Block layer qmp and info dump related functions
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "block/qapi.h"
#include "block/block_int.h"
#include "qmp-commands.h"
/*
* Returns 0 on success, with *p_list either set to describe snapshot
* information, or NULL because there are no snapshots. Returns -errno on
* error, with *p_list untouched.
*/
int bdrv_query_snapshot_info_list(BlockDriverState *bs,
SnapshotInfoList **p_list,
Error **errp)
{
int i, sn_count;
QEMUSnapshotInfo *sn_tab = NULL;
SnapshotInfoList *info_list, *cur_item = NULL, *head = NULL;
SnapshotInfo *info;
sn_count = bdrv_snapshot_list(bs, &sn_tab);
if (sn_count < 0) {
const char *dev = bdrv_get_device_name(bs);
switch (sn_count) {
case -ENOMEDIUM:
error_setg(errp, "Device '%s' is not inserted", dev);
break;
case -ENOTSUP:
error_setg(errp,
"Device '%s' does not support internal snapshots",
dev);
break;
default:
error_setg_errno(errp, -sn_count,
"Can't list snapshots of device '%s'", dev);
break;
}
return sn_count;
}
for (i = 0; i < sn_count; i++) {
info = g_new0(SnapshotInfo, 1);
info->id = g_strdup(sn_tab[i].id_str);
info->name = g_strdup(sn_tab[i].name);
info->vm_state_size = sn_tab[i].vm_state_size;
info->date_sec = sn_tab[i].date_sec;
info->date_nsec = sn_tab[i].date_nsec;
info->vm_clock_sec = sn_tab[i].vm_clock_nsec / 1000000000;
info->vm_clock_nsec = sn_tab[i].vm_clock_nsec % 1000000000;
info_list = g_new0(SnapshotInfoList, 1);
info_list->value = info;
/* XXX: waiting for the qapi to support qemu-queue.h types */
if (!cur_item) {
head = cur_item = info_list;
} else {
cur_item->next = info_list;
cur_item = info_list;
}
}
g_free(sn_tab);
*p_list = head;
return 0;
}
/**
* bdrv_query_image_info:
* @bs: block device to examine
* @p_info: location to store image information
* @errp: location to store error information
*
* Store "flat" image information in @p_info.
*
* "Flat" means it does *not* query backing image information,
* i.e. (*pinfo)->has_backing_image will be set to false and
* (*pinfo)->backing_image to NULL even when the image does in fact have
* a backing image.
*
* @p_info will be set only on success. On error, store error in @errp.
*/
void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
Error **errp)
{
uint64_t total_sectors;
const char *backing_filename;
char backing_filename2[1024];
BlockDriverInfo bdi;
int ret;
Error *err = NULL;
ImageInfo *info = g_new0(ImageInfo, 1);
bdrv_get_geometry(bs, &total_sectors);
info->filename = g_strdup(bs->filename);
info->format = g_strdup(bdrv_get_format_name(bs));
info->virtual_size = total_sectors * 512;
info->actual_size = bdrv_get_allocated_file_size(bs);
info->has_actual_size = info->actual_size >= 0;
if (bdrv_is_encrypted(bs)) {
info->encrypted = true;
info->has_encrypted = true;
}
if (bdrv_get_info(bs, &bdi) >= 0) {
if (bdi.cluster_size != 0) {
info->cluster_size = bdi.cluster_size;
info->has_cluster_size = true;
}
info->dirty_flag = bdi.is_dirty;
info->has_dirty_flag = true;
}
backing_filename = bs->backing_file;
if (backing_filename[0] != '\0') {
info->backing_filename = g_strdup(backing_filename);
info->has_backing_filename = true;
bdrv_get_full_backing_filename(bs, backing_filename2,
sizeof(backing_filename2));
if (strcmp(backing_filename, backing_filename2) != 0) {
info->full_backing_filename =
g_strdup(backing_filename2);
info->has_full_backing_filename = true;
}
if (bs->backing_format[0]) {
info->backing_filename_format = g_strdup(bs->backing_format);
info->has_backing_filename_format = true;
}
}
ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err);
switch (ret) {
case 0:
if (info->snapshots) {
info->has_snapshots = true;
}
break;
/* recoverable error */
case -ENOMEDIUM:
case -ENOTSUP:
error_free(err);
break;
default:
error_propagate(errp, err);
qapi_free_ImageInfo(info);
return;
}
*p_info = info;
}
/* @p_info will be set only on success. */
void bdrv_query_info(BlockDriverState *bs,
BlockInfo **p_info,
Error **errp)
{
BlockInfo *info = g_malloc0(sizeof(*info));
BlockDriverState *bs0;
ImageInfo **p_image_info;
Error *local_err = NULL;
info->device = g_strdup(bs->device_name);
info->type = g_strdup("unknown");
info->locked = bdrv_dev_is_medium_locked(bs);
info->removable = bdrv_dev_has_removable_media(bs);
if (bdrv_dev_has_removable_media(bs)) {
info->has_tray_open = true;
info->tray_open = bdrv_dev_is_tray_open(bs);
}
if (bdrv_iostatus_is_enabled(bs)) {
info->has_io_status = true;
info->io_status = bs->iostatus;
}
if (bs->dirty_bitmap) {
info->has_dirty = true;
info->dirty = g_malloc0(sizeof(*info->dirty));
info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
info->dirty->granularity =
((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
}
if (bs->drv) {
info->has_inserted = true;
info->inserted = g_malloc0(sizeof(*info->inserted));
info->inserted->file = g_strdup(bs->filename);
info->inserted->ro = bs->read_only;
info->inserted->drv = g_strdup(bs->drv->format_name);
info->inserted->encrypted = bs->encrypted;
info->inserted->encryption_key_missing = bdrv_key_required(bs);
if (bs->backing_file[0]) {
info->inserted->has_backing_file = true;
info->inserted->backing_file = g_strdup(bs->backing_file);
}
info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
if (bs->io_limits_enabled) {
info->inserted->bps =
bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
info->inserted->bps_rd =
bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
info->inserted->bps_wr =
bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
info->inserted->iops =
bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
info->inserted->iops_rd =
bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
info->inserted->iops_wr =
bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
}
bs0 = bs;
p_image_info = &info->inserted->image;
while (1) {
bdrv_query_image_info(bs0, p_image_info, &local_err);
if (error_is_set(&local_err)) {
error_propagate(errp, local_err);
goto err;
}
if (bs0->drv && bs0->backing_hd) {
bs0 = bs0->backing_hd;
(*p_image_info)->has_backing_image = true;
p_image_info = &((*p_image_info)->backing_image);
} else {
break;
}
}
}
*p_info = info;
return;
err:
qapi_free_BlockInfo(info);
}
BlockStats *bdrv_query_stats(const BlockDriverState *bs)
{
BlockStats *s;
s = g_malloc0(sizeof(*s));
if (bs->device_name[0]) {
s->has_device = true;
s->device = g_strdup(bs->device_name);
}
s->stats = g_malloc0(sizeof(*s->stats));
s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
if (bs->file) {
s->has_parent = true;
s->parent = bdrv_query_stats(bs->file);
}
return s;
}
BlockInfoList *qmp_query_block(Error **errp)
{
BlockInfoList *head = NULL, **p_next = &head;
BlockDriverState *bs = NULL;
Error *local_err = NULL;
while ((bs = bdrv_next(bs))) {
BlockInfoList *info = g_malloc0(sizeof(*info));
bdrv_query_info(bs, &info->value, &local_err);
if (error_is_set(&local_err)) {
error_propagate(errp, local_err);
goto err;
}
*p_next = info;
p_next = &info->next;
}
return head;
err:
qapi_free_BlockInfoList(head);
return NULL;
}
BlockStatsList *qmp_query_blockstats(Error **errp)
{
BlockStatsList *head = NULL, **p_next = &head;
BlockDriverState *bs = NULL;
while ((bs = bdrv_next(bs))) {
BlockStatsList *info = g_malloc0(sizeof(*info));
info->value = bdrv_query_stats(bs);
*p_next = info;
p_next = &info->next;
}
return head;
}
#define NB_SUFFIXES 4
static char *get_human_readable_size(char *buf, int buf_size, int64_t size)
{
static const char suffixes[NB_SUFFIXES] = "KMGT";
int64_t base;
int i;
if (size <= 999) {
snprintf(buf, buf_size, "%" PRId64, size);
} else {
base = 1024;
for (i = 0; i < NB_SUFFIXES; i++) {
if (size < (10 * base)) {
snprintf(buf, buf_size, "%0.1f%c",
(double)size / base,
suffixes[i]);
break;
} else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
snprintf(buf, buf_size, "%" PRId64 "%c",
((size + (base >> 1)) / base),
suffixes[i]);
break;
}
base = base * 1024;
}
}
return buf;
}
void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f,
QEMUSnapshotInfo *sn)
{
char buf1[128], date_buf[128], clock_buf[128];
struct tm tm;
time_t ti;
int64_t secs;
if (!sn) {
func_fprintf(f,
"%-10s%-20s%7s%20s%15s",
"ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
} else {
ti = sn->date_sec;
localtime_r(&ti, &tm);
strftime(date_buf, sizeof(date_buf),
"%Y-%m-%d %H:%M:%S", &tm);
secs = sn->vm_clock_nsec / 1000000000;
snprintf(clock_buf, sizeof(clock_buf),
"%02d:%02d:%02d.%03d",
(int)(secs / 3600),
(int)((secs / 60) % 60),
(int)(secs % 60),
(int)((sn->vm_clock_nsec / 1000000) % 1000));
func_fprintf(f,
"%-10s%-20s%7s%20s%15s",
sn->id_str, sn->name,
get_human_readable_size(buf1, sizeof(buf1),
sn->vm_state_size),
date_buf,
clock_buf);
}
}
void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
ImageInfo *info)
{
char size_buf[128], dsize_buf[128];
if (!info->has_actual_size) {
snprintf(dsize_buf, sizeof(dsize_buf), "unavailable");
} else {
get_human_readable_size(dsize_buf, sizeof(dsize_buf),
info->actual_size);
}
get_human_readable_size(size_buf, sizeof(size_buf), info->virtual_size);
func_fprintf(f,
"image: %s\n"
"file format: %s\n"
"virtual size: %s (%" PRId64 " bytes)\n"
"disk size: %s\n",
info->filename, info->format, size_buf,
info->virtual_size,
dsize_buf);
if (info->has_encrypted && info->encrypted) {
func_fprintf(f, "encrypted: yes\n");
}
if (info->has_cluster_size) {
func_fprintf(f, "cluster_size: %" PRId64 "\n",
info->cluster_size);
}
if (info->has_dirty_flag && info->dirty_flag) {
func_fprintf(f, "cleanly shut down: no\n");
}
if (info->has_backing_filename) {
func_fprintf(f, "backing file: %s", info->backing_filename);
if (info->has_full_backing_filename) {
func_fprintf(f, " (actual path: %s)", info->full_backing_filename);
}
func_fprintf(f, "\n");
if (info->has_backing_filename_format) {
func_fprintf(f, "backing file format: %s\n",
info->backing_filename_format);
}
}
if (info->has_snapshots) {
SnapshotInfoList *elem;
func_fprintf(f, "Snapshot list:\n");
bdrv_snapshot_dump(func_fprintf, f, NULL);
func_fprintf(f, "\n");
/* Ideally bdrv_snapshot_dump() would operate on SnapshotInfoList but
* we convert to the block layer's native QEMUSnapshotInfo for now.
*/
for (elem = info->snapshots; elem; elem = elem->next) {
QEMUSnapshotInfo sn = {
.vm_state_size = elem->value->vm_state_size,
.date_sec = elem->value->date_sec,
.date_nsec = elem->value->date_nsec,
.vm_clock_nsec = elem->value->vm_clock_sec * 1000000000ULL +
elem->value->vm_clock_nsec,
};
pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
bdrv_snapshot_dump(func_fprintf, f, &sn);
func_fprintf(f, "\n");
}
}
}

View File

@ -25,7 +25,7 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include <zlib.h>
#include "block/aes.h"
#include "qemu/aes.h"
#include "migration/migration.h"
/**************************************************************/
@ -679,7 +679,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
return ret;
}
ret = bdrv_file_open(&qcow_bs, filename, BDRV_O_RDWR);
ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR);
if (ret < 0) {
return ret;
}
@ -787,8 +787,21 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
uint8_t *out_buf;
uint64_t cluster_offset;
if (nb_sectors != s->cluster_sectors)
return -EINVAL;
if (nb_sectors != s->cluster_sectors) {
ret = -EINVAL;
/* Zero-pad last write if image size is not cluster aligned */
if (sector_num + nb_sectors == bs->total_sectors &&
nb_sectors < s->cluster_sectors) {
uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
memset(pad_buf, 0, s->cluster_size);
memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
ret = qcow_write_compressed(bs, sector_num,
pad_buf, s->cluster_sectors);
qemu_vfree(pad_buf);
}
return ret;
}
out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
@ -879,6 +892,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_close = qcow_close,
.bdrv_reopen_prepare = qcow_reopen_prepare,
.bdrv_create = qcow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,

View File

@ -29,12 +29,13 @@
#include "block/qcow2.h"
#include "trace.h"
int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size)
{
BDRVQcowState *s = bs->opaque;
int new_l1_size, new_l1_size2, ret, i;
int new_l1_size2, ret, i;
uint64_t *new_l1_table;
int64_t new_l1_table_offset;
int64_t new_l1_table_offset, new_l1_size;
uint8_t data[12];
if (min_size <= s->l1_size)
@ -53,8 +54,13 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
}
}
if (new_l1_size > INT_MAX) {
return -EFBIG;
}
#ifdef DEBUG_ALLOC2
fprintf(stderr, "grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
s->l1_size, new_l1_size);
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
@ -92,14 +98,16 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
goto fail;
}
g_free(s->l1_table);
qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
QCOW2_DISCARD_OTHER);
s->l1_table_offset = new_l1_table_offset;
s->l1_table = new_l1_table;
s->l1_size = new_l1_size;
return 0;
fail:
g_free(new_l1_table);
qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2);
qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
QCOW2_DISCARD_OTHER);
return ret;
}
@ -391,8 +399,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *cluster_offset)
{
BDRVQcowState *s = bs->opaque;
unsigned int l1_index, l2_index;
uint64_t l2_offset, *l2_table;
unsigned int l2_index;
uint64_t l1_index, l2_offset, *l2_table;
int l1_bits, c;
unsigned int index_in_cluster, nb_clusters;
uint64_t nb_available, nb_needed;
@ -507,8 +515,8 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
int *new_l2_index)
{
BDRVQcowState *s = bs->opaque;
unsigned int l1_index, l2_index;
uint64_t l2_offset;
unsigned int l2_index;
uint64_t l1_index, l2_offset;
uint64_t *l2_table = NULL;
int ret;
@ -522,6 +530,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
}
}
assert(l1_index < s->l1_size);
l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
/* seek the l2 table of the given l2 offset */
@ -541,7 +550,8 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
/* Then decrease the refcount of the old table */
if (l2_offset) {
qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
QCOW2_DISCARD_OTHER);
}
}
@ -708,10 +718,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
/*
* If this was a COW, we need to decrease the refcount of the old cluster.
* Also flush bs->file to get the right order for L2 and refcount update.
*
* Don't discard clusters that reach a refcount of 0 (e.g. compressed
* clusters), the next write will reuse them anyway.
*/
if (j != 0) {
for (i = 0; i < j; i++) {
qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1);
qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
QCOW2_DISCARD_NEVER);
}
}
@ -759,31 +773,50 @@ out:
* Check if there already is an AIO write request in flight which allocates
* the same cluster. In this case we need to wait until the previous
* request has completed and updated the L2 table accordingly.
*
* Returns:
* 0 if there was no dependency. *cur_bytes indicates the number of
* bytes from guest_offset that can be read before the next
* dependency must be processed (or the request is complete)
*
* -EAGAIN if we had to wait for another request, previously gathered
* information on cluster allocation may be invalid now. The caller
* must start over anyway, so consider *cur_bytes undefined.
*/
static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
unsigned int *nb_clusters)
uint64_t *cur_bytes, QCowL2Meta **m)
{
BDRVQcowState *s = bs->opaque;
QCowL2Meta *old_alloc;
uint64_t bytes = *cur_bytes;
QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
uint64_t start = guest_offset >> s->cluster_bits;
uint64_t end = start + *nb_clusters;
uint64_t old_start = old_alloc->offset >> s->cluster_bits;
uint64_t old_end = old_start + old_alloc->nb_clusters;
uint64_t start = guest_offset;
uint64_t end = start + bytes;
uint64_t old_start = l2meta_cow_start(old_alloc);
uint64_t old_end = l2meta_cow_end(old_alloc);
if (end < old_start || start > old_end) {
if (end <= old_start || start >= old_end) {
/* No intersection */
} else {
if (start < old_start) {
/* Stop at the start of a running allocation */
*nb_clusters = old_start - start;
bytes = old_start - start;
} else {
*nb_clusters = 0;
bytes = 0;
}
if (*nb_clusters == 0) {
/* Stop if already an l2meta exists. After yielding, it wouldn't
* be valid any more, so we'd have to clean up the old L2Metas
* and deal with requests depending on them before starting to
* gather new ones. Not worth the trouble. */
if (bytes == 0 && *m) {
*cur_bytes = 0;
return 0;
}
if (bytes == 0) {
/* Wait for the dependency to complete. We need to recheck
* the free/allocated clusters when we continue. */
qemu_co_mutex_unlock(&s->lock);
@ -794,13 +827,117 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
}
}
if (!*nb_clusters) {
abort();
}
/* Make sure that existing clusters and new allocations are only used up to
* the next dependency if we shortened the request above */
*cur_bytes = bytes;
return 0;
}
/*
* Checks how many already allocated clusters that don't require a copy on
* write there are at the given guest_offset (up to *bytes). If
* *host_offset is not zero, only physically contiguous clusters beginning at
* this host offset are counted.
*
* Note that guest_offset may not be cluster aligned. In this case, the
* returned *host_offset points to exact byte referenced by guest_offset and
* therefore isn't cluster aligned as well.
*
* Returns:
* 0: if no allocated clusters are available at the given offset.
* *bytes is normally unchanged. It is set to 0 if the cluster
* is allocated and doesn't need COW, but doesn't have the right
* physical offset.
*
* 1: if allocated clusters that don't require a COW are available at
* the requested offset. *bytes may have decreased and describes
* the length of the area that can be written to.
*
* -errno: in error cases
*/
static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
BDRVQcowState *s = bs->opaque;
int l2_index;
uint64_t cluster_offset;
uint64_t *l2_table;
unsigned int nb_clusters;
unsigned int keep_clusters;
int ret, pret;
trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
*bytes);
assert(*host_offset == 0 || offset_into_cluster(s, guest_offset)
== offset_into_cluster(s, *host_offset));
/*
* Calculate the number of clusters to look for. We stop at L2 table
* boundaries to keep things simple.
*/
nb_clusters =
size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
l2_index = offset_to_l2_index(s, guest_offset);
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
/* Find L2 entry for the first involved cluster */
ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
if (ret < 0) {
return ret;
}
cluster_offset = be64_to_cpu(l2_table[l2_index]);
/* Check how many clusters are already allocated and don't need COW */
if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
&& (cluster_offset & QCOW_OFLAG_COPIED))
{
/* If a specific host_offset is required, check it */
bool offset_matches =
(cluster_offset & L2E_OFFSET_MASK) == *host_offset;
if (*host_offset != 0 && !offset_matches) {
*bytes = 0;
ret = 0;
goto out;
}
/* We keep all QCOW_OFLAG_COPIED clusters */
keep_clusters =
count_contiguous_clusters(nb_clusters, s->cluster_size,
&l2_table[l2_index], 0,
QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
assert(keep_clusters <= nb_clusters);
*bytes = MIN(*bytes,
keep_clusters * s->cluster_size
- offset_into_cluster(s, guest_offset));
ret = 1;
} else {
ret = 0;
}
/* Cleanup */
out:
pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
if (pret < 0) {
return pret;
}
/* Only return a host offset if we actually made progress. Otherwise we
* would make requirements for handle_alloc() that it can't fulfill */
if (ret) {
*host_offset = (cluster_offset & L2E_OFFSET_MASK)
+ offset_into_cluster(s, guest_offset);
}
return ret;
}
/*
* Allocates new clusters for the given guest_offset.
*
@ -824,16 +961,10 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, unsigned int *nb_clusters)
{
BDRVQcowState *s = bs->opaque;
int ret;
trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
*host_offset, *nb_clusters);
ret = handle_dependencies(bs, guest_offset, nb_clusters);
if (ret < 0) {
return ret;
}
/* Allocate new clusters */
trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
if (*host_offset == 0) {
@ -845,7 +976,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
*host_offset = cluster_offset;
return 0;
} else {
ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
if (ret < 0) {
return ret;
}
@ -854,6 +985,151 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
}
}
/*
* Allocates new clusters for an area that either is yet unallocated or needs a
* copy on write. If *host_offset is non-zero, clusters are only allocated if
* the new allocation can match the specified host offset.
*
* Note that guest_offset may not be cluster aligned. In this case, the
* returned *host_offset points to exact byte referenced by guest_offset and
* therefore isn't cluster aligned as well.
*
* Returns:
* 0: if no clusters could be allocated. *bytes is set to 0,
* *host_offset is left unchanged.
*
* 1: if new clusters were allocated. *bytes may be decreased if the
* new allocation doesn't cover all of the requested area.
* *host_offset is updated to contain the host offset of the first
* newly allocated cluster.
*
* -errno: in error cases
*/
static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
BDRVQcowState *s = bs->opaque;
int l2_index;
uint64_t *l2_table;
uint64_t entry;
unsigned int nb_clusters;
int ret;
uint64_t alloc_cluster_offset;
trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
*bytes);
assert(*bytes > 0);
/*
* Calculate the number of clusters to look for. We stop at L2 table
* boundaries to keep things simple.
*/
nb_clusters =
size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
l2_index = offset_to_l2_index(s, guest_offset);
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
/* Find L2 entry for the first involved cluster */
ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
if (ret < 0) {
return ret;
}
entry = be64_to_cpu(l2_table[l2_index]);
/* For the moment, overwrite compressed clusters one by one */
if (entry & QCOW_OFLAG_COMPRESSED) {
nb_clusters = 1;
} else {
nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
}
/* This function is only called when there were no non-COW clusters, so if
* we can't find any unallocated or COW clusters either, something is
* wrong with our code. */
assert(nb_clusters > 0);
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
if (ret < 0) {
return ret;
}
/* Allocate, if necessary at a given offset in the image file */
alloc_cluster_offset = start_of_cluster(s, *host_offset);
ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
&nb_clusters);
if (ret < 0) {
goto fail;
}
/* Can't extend contiguous allocation */
if (nb_clusters == 0) {
*bytes = 0;
return 0;
}
/*
* Save info needed for meta data update.
*
* requested_sectors: Number of sectors from the start of the first
* newly allocated cluster to the end of the (possibly shortened
* before) write request.
*
* avail_sectors: Number of sectors from the start of the first
* newly allocated to the end of the last newly allocated cluster.
*
* nb_sectors: The number of sectors from the start of the first
* newly allocated cluster to the end of the area that the write
* request actually writes to (excluding COW at the end)
*/
int requested_sectors =
(*bytes + offset_into_cluster(s, guest_offset))
>> BDRV_SECTOR_BITS;
int avail_sectors = nb_clusters
<< (s->cluster_bits - BDRV_SECTOR_BITS);
int alloc_n_start = offset_into_cluster(s, guest_offset)
>> BDRV_SECTOR_BITS;
int nb_sectors = MIN(requested_sectors, avail_sectors);
QCowL2Meta *old_m = *m;
*m = g_malloc0(sizeof(**m));
**m = (QCowL2Meta) {
.next = old_m,
.alloc_offset = alloc_cluster_offset,
.offset = start_of_cluster(s, guest_offset),
.nb_clusters = nb_clusters,
.nb_available = nb_sectors,
.cow_start = {
.offset = 0,
.nb_sectors = alloc_n_start,
},
.cow_end = {
.offset = nb_sectors * BDRV_SECTOR_SIZE,
.nb_sectors = avail_sectors - nb_sectors,
},
};
qemu_co_queue_init(&(*m)->dependent_requests);
QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
*host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
*bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
- offset_into_cluster(s, guest_offset));
assert(*bytes != 0);
return 1;
fail:
if (*m && (*m)->nb_clusters > 0) {
QLIST_REMOVE(*m, next_in_flight);
}
return ret;
}
/*
* alloc_cluster_offset
*
@ -877,161 +1153,110 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret, sectors;
uint64_t *l2_table;
unsigned int nb_clusters, keep_clusters;
uint64_t start, remaining;
uint64_t cluster_offset;
uint64_t cur_bytes;
int ret;
trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
n_start, n_end);
/* Find L2 entry for the first involved cluster */
assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
offset = start_of_cluster(s, offset);
again:
ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
if (ret < 0) {
return ret;
}
start = offset + (n_start << BDRV_SECTOR_BITS);
remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
cluster_offset = 0;
*host_offset = 0;
cur_bytes = 0;
*m = NULL;
/*
* Calculate the number of clusters to look for. We stop at L2 table
* boundaries to keep things simple.
*/
nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS),
s->l2_size - l2_index);
while (true) {
cluster_offset = be64_to_cpu(l2_table[l2_index]);
/*
* Check how many clusters are already allocated and don't need COW, and how
* many need a new allocation.
*/
if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
&& (cluster_offset & QCOW_OFLAG_COPIED))
{
/* We keep all QCOW_OFLAG_COPIED clusters */
keep_clusters =
count_contiguous_clusters(nb_clusters, s->cluster_size,
&l2_table[l2_index], 0,
QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
assert(keep_clusters <= nb_clusters);
nb_clusters -= keep_clusters;
} else {
keep_clusters = 0;
cluster_offset = 0;
}
if (nb_clusters > 0) {
/* For the moment, overwrite compressed clusters one by one */
uint64_t entry = be64_to_cpu(l2_table[l2_index + keep_clusters]);
if (entry & QCOW_OFLAG_COMPRESSED) {
nb_clusters = 1;
} else {
nb_clusters = count_cow_clusters(s, nb_clusters, l2_table,
l2_index + keep_clusters);
}
}
cluster_offset &= L2E_OFFSET_MASK;
/*
* The L2 table isn't used any more after this. As long as the cache works
* synchronously, it's important to release it before calling
* do_alloc_cluster_offset, which may yield if we need to wait for another
* request to complete. If we still had the reference, we could use up the
* whole cache with sleeping requests.
*/
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
if (ret < 0) {
return ret;
}
/* If there is something left to allocate, do that now */
if (nb_clusters > 0) {
uint64_t alloc_offset;
uint64_t alloc_cluster_offset;
uint64_t keep_bytes = keep_clusters * s->cluster_size;
/* Calculate start and size of allocation */
alloc_offset = offset + keep_bytes;
if (keep_clusters == 0) {
alloc_cluster_offset = 0;
} else {
alloc_cluster_offset = cluster_offset + keep_bytes;
if (!*host_offset) {
*host_offset = start_of_cluster(s, cluster_offset);
}
/* Allocate, if necessary at a given offset in the image file */
ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset,
&nb_clusters);
assert(remaining >= cur_bytes);
start += cur_bytes;
remaining -= cur_bytes;
cluster_offset += cur_bytes;
if (remaining == 0) {
break;
}
cur_bytes = remaining;
/*
* Now start gathering as many contiguous clusters as possible:
*
* 1. Check for overlaps with in-flight allocations
*
* a) Overlap not in the first cluster -> shorten this request and
* let the caller handle the rest in its next loop iteration.
*
* b) Real overlaps of two requests. Yield and restart the search
* for contiguous clusters (the situation could have changed
* while we were sleeping)
*
* c) TODO: Request starts in the same cluster as the in-flight
* allocation ends. Shorten the COW of the in-fight allocation,
* set cluster_offset to write to the same cluster and set up
* the right synchronisation between the in-flight request and
* the new one.
*/
ret = handle_dependencies(bs, start, &cur_bytes, m);
if (ret == -EAGAIN) {
/* Currently handle_dependencies() doesn't yield if we already had
* an allocation. If it did, we would have to clean up the L2Meta
* structs before starting over. */
assert(*m == NULL);
goto again;
} else if (ret < 0) {
goto fail;
return ret;
} else if (cur_bytes == 0) {
break;
} else {
/* handle_dependencies() may have decreased cur_bytes (shortened
* the allocations below) so that the next dependency is processed
* correctly during the next loop iteration. */
}
/* save info needed for meta data update */
if (nb_clusters > 0) {
/*
* requested_sectors: Number of sectors from the start of the first
* newly allocated cluster to the end of the (possibly shortened
* before) write request.
*
* avail_sectors: Number of sectors from the start of the first
* newly allocated to the end of the last newly allocated cluster.
*
* nb_sectors: The number of sectors from the start of the first
* newly allocated cluster to the end of the aread that the write
* request actually writes to (excluding COW at the end)
*/
int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
int avail_sectors = nb_clusters
<< (s->cluster_bits - BDRV_SECTOR_BITS);
int alloc_n_start = keep_clusters == 0 ? n_start : 0;
int nb_sectors = MIN(requested_sectors, avail_sectors);
/*
* 2. Count contiguous COPIED clusters.
*/
ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
if (ret < 0) {
return ret;
} else if (ret) {
continue;
} else if (cur_bytes == 0) {
break;
}
if (keep_clusters == 0) {
cluster_offset = alloc_cluster_offset;
}
*m = g_malloc0(sizeof(**m));
**m = (QCowL2Meta) {
.alloc_offset = alloc_cluster_offset,
.offset = alloc_offset & ~(s->cluster_size - 1),
.nb_clusters = nb_clusters,
.nb_available = nb_sectors,
.cow_start = {
.offset = 0,
.nb_sectors = alloc_n_start,
},
.cow_end = {
.offset = nb_sectors * BDRV_SECTOR_SIZE,
.nb_sectors = avail_sectors - nb_sectors,
},
};
qemu_co_queue_init(&(*m)->dependent_requests);
QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
/*
* 3. If the request still hasn't completed, allocate new clusters,
* considering any cluster_offset of steps 1c or 2.
*/
ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
if (ret < 0) {
return ret;
} else if (ret) {
continue;
} else {
assert(cur_bytes == 0);
break;
}
}
/* Some cleanup work */
sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9);
if (sectors > n_end) {
sectors = n_end;
}
assert(sectors > n_start);
*num = sectors - n_start;
*host_offset = cluster_offset;
*num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
assert(*num > 0);
assert(*host_offset != 0);
return 0;
fail:
if (*m && (*m)->nb_clusters > 0) {
QLIST_REMOVE(*m, next_in_flight);
}
return ret;
}
static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
@ -1121,7 +1346,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
l2_table[l2_index + i] = cpu_to_be64(0);
/* Then decrease the refcount */
qcow2_free_any_clusters(bs, old_offset, 1);
qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
}
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
@ -1152,18 +1377,25 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
nb_clusters = size_to_clusters(s, end_offset - offset);
s->cache_discards = true;
/* Each L2 table is handled by its own loop iteration */
while (nb_clusters > 0) {
ret = discard_single_l2(bs, offset, nb_clusters);
if (ret < 0) {
return ret;
goto fail;
}
nb_clusters -= ret;
offset += (ret * s->cluster_size);
}
return 0;
ret = 0;
fail:
s->cache_discards = false;
qcow2_process_discards(bs, ret);
return ret;
}
/*
@ -1197,7 +1429,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
if (old_offset & QCOW_OFLAG_COMPRESSED) {
l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
qcow2_free_any_clusters(bs, old_offset, 1);
qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
} else {
l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
}
@ -1225,15 +1457,22 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
/* Each L2 table is handled by its own loop iteration */
nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
s->cache_discards = true;
while (nb_clusters > 0) {
ret = zero_single_l2(bs, offset, nb_clusters);
if (ret < 0) {
return ret;
goto fail;
}
nb_clusters -= ret;
offset += (ret * s->cluster_size);
}
return 0;
ret = 0;
fail:
s->cache_discards = false;
qcow2_process_discards(bs, ret);
return ret;
}

View File

@ -29,7 +29,7 @@
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int64_t offset, int64_t length,
int addend);
int addend, enum qcow2_discard_type type);
/*********************************************************/
@ -235,7 +235,8 @@ static int alloc_refcount_block(BlockDriverState *bs,
} else {
/* Described somewhere else. This can recurse at most twice before we
* arrive at a block that describes itself. */
ret = update_refcount(bs, new_block, s->cluster_size, 1);
ret = update_refcount(bs, new_block, s->cluster_size, 1,
QCOW2_DISCARD_NEVER);
if (ret < 0) {
goto fail_block;
}
@ -399,7 +400,8 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* Free old table. Remember, we must not change free_cluster_index */
uint64_t old_free_cluster_index = s->free_cluster_index;
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
QCOW2_DISCARD_OTHER);
s->free_cluster_index = old_free_cluster_index;
ret = load_refcount_block(bs, new_block, (void**) refcount_block);
@ -418,9 +420,77 @@ fail_block:
return ret;
}
void qcow2_process_discards(BlockDriverState *bs, int ret)
{
BDRVQcowState *s = bs->opaque;
Qcow2DiscardRegion *d, *next;
QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
QTAILQ_REMOVE(&s->discards, d, next);
/* Discard is optional, ignore the return value */
if (ret >= 0) {
bdrv_discard(bs->file,
d->offset >> BDRV_SECTOR_BITS,
d->bytes >> BDRV_SECTOR_BITS);
}
g_free(d);
}
}
static void update_refcount_discard(BlockDriverState *bs,
uint64_t offset, uint64_t length)
{
BDRVQcowState *s = bs->opaque;
Qcow2DiscardRegion *d, *p, *next;
QTAILQ_FOREACH(d, &s->discards, next) {
uint64_t new_start = MIN(offset, d->offset);
uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
if (new_end - new_start <= length + d->bytes) {
/* There can't be any overlap, areas ending up here have no
* references any more and therefore shouldn't get freed another
* time. */
assert(d->bytes + length == new_end - new_start);
d->offset = new_start;
d->bytes = new_end - new_start;
goto found;
}
}
d = g_malloc(sizeof(*d));
*d = (Qcow2DiscardRegion) {
.bs = bs,
.offset = offset,
.bytes = length,
};
QTAILQ_INSERT_TAIL(&s->discards, d, next);
found:
/* Merge discard requests if they are adjacent now */
QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
if (p == d
|| p->offset > d->offset + d->bytes
|| d->offset > p->offset + p->bytes)
{
continue;
}
/* Still no overlap possible */
assert(p->offset == d->offset + d->bytes
|| d->offset == p->offset + p->bytes);
QTAILQ_REMOVE(&s->discards, p, next);
d->offset = MIN(d->offset, p->offset);
d->bytes += p->bytes;
}
}
/* XXX: cache several refcount block clusters ? */
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int64_t offset, int64_t length, int addend)
int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
{
BDRVQcowState *s = bs->opaque;
int64_t start, last, cluster_offset;
@ -486,10 +556,18 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
s->free_cluster_index = cluster_index;
}
refcount_block[block_index] = cpu_to_be16(refcount);
if (refcount == 0 && s->discard_passthrough[type]) {
update_refcount_discard(bs, cluster_offset, s->cluster_size);
}
}
ret = 0;
fail:
if (!s->cache_discards) {
qcow2_process_discards(bs, ret);
}
/* Write last changed block to disk */
if (refcount_block) {
int wret;
@ -506,7 +584,8 @@ fail:
*/
if (ret < 0) {
int dummy;
dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);
dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
QCOW2_DISCARD_NEVER);
(void)dummy;
}
@ -522,12 +601,14 @@ fail:
*/
static int update_cluster_refcount(BlockDriverState *bs,
int64_t cluster_index,
int addend)
int addend,
enum qcow2_discard_type type)
{
BDRVQcowState *s = bs->opaque;
int ret;
ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
type);
if (ret < 0) {
return ret;
}
@ -579,7 +660,7 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
return offset;
}
ret = update_refcount(bs, offset, size, 1);
ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
if (ret < 0) {
return ret;
}
@ -611,7 +692,8 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
old_free_cluster_index = s->free_cluster_index;
s->free_cluster_index = cluster_index + i;
ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
QCOW2_DISCARD_NEVER);
if (ret < 0) {
return ret;
}
@ -649,7 +731,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
if (free_in_cluster == 0)
s->free_byte_offset = 0;
if ((offset & (s->cluster_size - 1)) != 0)
update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
QCOW2_DISCARD_NEVER);
} else {
offset = qcow2_alloc_clusters(bs, s->cluster_size);
if (offset < 0) {
@ -659,7 +742,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
if ((cluster_offset + s->cluster_size) == offset) {
/* we are lucky: contiguous data */
offset = s->free_byte_offset;
update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
QCOW2_DISCARD_NEVER);
s->free_byte_offset += size;
} else {
s->free_byte_offset = offset;
@ -676,12 +760,13 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
}
void qcow2_free_clusters(BlockDriverState *bs,
int64_t offset, int64_t size)
int64_t offset, int64_t size,
enum qcow2_discard_type type)
{
int ret;
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
ret = update_refcount(bs, offset, size, -1);
ret = update_refcount(bs, offset, size, -1, type);
if (ret < 0) {
fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
/* TODO Remember the clusters to free them later and avoid leaking */
@ -692,8 +777,8 @@ void qcow2_free_clusters(BlockDriverState *bs,
* Free a cluster using its L2 entry (handles clusters of all types, e.g.
* normal cluster, compressed cluster, etc.)
*/
void qcow2_free_any_clusters(BlockDriverState *bs,
uint64_t l2_entry, int nb_clusters)
void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
int nb_clusters, enum qcow2_discard_type type)
{
BDRVQcowState *s = bs->opaque;
@ -705,12 +790,12 @@ void qcow2_free_any_clusters(BlockDriverState *bs,
s->csize_mask) + 1;
qcow2_free_clusters(bs,
(l2_entry & s->cluster_offset_mask) & ~511,
nb_csectors * 512);
nb_csectors * 512, type);
}
break;
case QCOW2_CLUSTER_NORMAL:
qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
nb_clusters << s->cluster_bits);
nb_clusters << s->cluster_bits, type);
break;
case QCOW2_CLUSTER_UNALLOCATED:
case QCOW2_CLUSTER_ZERO:
@ -741,16 +826,17 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
l1_table = NULL;
l1_size2 = l1_size * sizeof(uint64_t);
s->cache_discards = true;
/* WARNING: qcow2_snapshot_goto relies on this function not using the
* l1_table_offset when it is the current s->l1_table_offset! Be careful
* when changing this! */
if (l1_table_offset != s->l1_table_offset) {
l1_table = g_malloc0(align_offset(l1_size2, 512));
l1_allocated = 1;
if (bdrv_pread(bs->file, l1_table_offset,
l1_table, l1_size2) != l1_size2)
{
ret = -EIO;
ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
goto fail;
}
@ -786,7 +872,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int ret;
ret = update_refcount(bs,
(offset & s->cluster_offset_mask) & ~511,
nb_csectors * 512, addend);
nb_csectors * 512, addend,
QCOW2_DISCARD_SNAPSHOT);
if (ret < 0) {
goto fail;
}
@ -796,13 +883,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
} else {
uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
if (addend != 0) {
refcount = update_cluster_refcount(bs, cluster_index, addend);
refcount = update_cluster_refcount(bs, cluster_index, addend,
QCOW2_DISCARD_SNAPSHOT);
} else {
refcount = get_refcount(bs, cluster_index);
}
if (refcount < 0) {
ret = -EIO;
ret = refcount;
goto fail;
}
}
@ -828,12 +916,13 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
if (addend != 0) {
refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
QCOW2_DISCARD_SNAPSHOT);
} else {
refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
}
if (refcount < 0) {
ret = -EIO;
ret = refcount;
goto fail;
} else if (refcount == 1) {
l2_offset |= QCOW_OFLAG_COPIED;
@ -851,15 +940,20 @@ fail:
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
}
s->cache_discards = false;
qcow2_process_discards(bs, ret);
/* Update L1 only if it isn't deleted anyway (addend = -1) */
if (addend >= 0 && l1_modified) {
for(i = 0; i < l1_size; i++)
if (ret == 0 && addend >= 0 && l1_modified) {
for (i = 0; i < l1_size; i++) {
cpu_to_be64s(&l1_table[i]);
if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table,
l1_size2) < 0)
goto fail;
for(i = 0; i < l1_size; i++)
}
ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
for (i = 0; i < l1_size; i++) {
be64_to_cpus(&l1_table[i]);
}
}
if (l1_allocated)
g_free(l1_table);
@ -1152,9 +1246,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
size = bdrv_getlength(bs->file);
nb_clusters = size_to_clusters(s, size);
res->bfi.total_clusters = nb_clusters;
refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
res->bfi.total_clusters =
size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
/* header */
inc_refcounts(bs, res, refcount_table, nb_clusters,
0, s->cluster_size);
@ -1250,7 +1346,8 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
if (num_fixed) {
ret = update_refcount(bs, i << s->cluster_bits, 1,
refcount2 - refcount1);
refcount2 - refcount1,
QCOW2_DISCARD_ALWAYS);
if (ret >= 0) {
(*num_fixed)++;
continue;

View File

@ -262,7 +262,8 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
}
/* free the old snapshot table */
qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
QCOW2_DISCARD_SNAPSHOT);
s->snapshots_offset = snapshots_offset;
s->snapshots_size = snapshots_size;
return 0;
@ -569,7 +570,8 @@ int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
if (ret < 0) {
return ret;
}
qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t));
qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
QCOW2_DISCARD_SNAPSHOT);
/* must update the copied flag on the current cluster offsets */
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);

View File

@ -25,10 +25,11 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include <zlib.h>
#include "block/aes.h"
#include "qemu/aes.h"
#include "block/qcow2.h"
#include "qemu/error-report.h"
#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qbool.h"
#include "trace.h"
/*
@ -290,10 +291,26 @@ static QemuOptsList qcow2_runtime_opts = {
.head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
.desc = {
{
.name = "lazy_refcounts",
.name = QCOW2_OPT_LAZY_REFCOUNTS,
.type = QEMU_OPT_BOOL,
.help = "Postpone refcount updates",
},
{
.name = QCOW2_OPT_DISCARD_REQUEST,
.type = QEMU_OPT_BOOL,
.help = "Pass guest discard requests to the layer below",
},
{
.name = QCOW2_OPT_DISCARD_SNAPSHOT,
.type = QEMU_OPT_BOOL,
.help = "Generate discard requests when snapshot related space "
"is freed",
},
{
.name = QCOW2_OPT_DISCARD_OTHER,
.type = QEMU_OPT_BOOL,
.help = "Generate discard requests when other clusters are freed",
},
{ /* end of list */ }
},
};
@ -306,6 +323,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
QemuOpts *opts;
Error *local_err = NULL;
uint64_t ext_end;
uint64_t l1_vm_state_index;
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
@ -423,7 +441,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
/* read the level 1 table */
s->l1_size = header.l1_size;
s->l1_vm_state_index = size_to_l1(s, header.size);
l1_vm_state_index = size_to_l1(s, header.size);
if (l1_vm_state_index > INT_MAX) {
ret = -EFBIG;
goto fail;
}
s->l1_vm_state_index = l1_vm_state_index;
/* the L1 table must contain at least enough entries to put
header.size bytes */
if (s->l1_size < s->l1_vm_state_index) {
@ -461,6 +486,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
}
QLIST_INIT(&s->cluster_allocs);
QTAILQ_INIT(&s->discards);
/* read qcow2 extensions */
if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
@ -520,9 +546,19 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
goto fail;
}
s->use_lazy_refcounts = qemu_opt_get_bool(opts, "lazy_refcounts",
s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
(s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
flags & BDRV_O_UNMAP);
s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
s->discard_passthrough[QCOW2_DISCARD_OTHER] =
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
qemu_opts_del(opts);
if (s->use_lazy_refcounts && s->qcow_version < 3) {
@ -857,7 +893,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
goto fail;
}
if (l2meta != NULL) {
while (l2meta != NULL) {
QCowL2Meta *next;
ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
if (ret < 0) {
goto fail;
@ -868,12 +906,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
QLIST_REMOVE(l2meta, next_in_flight);
}
qemu_co_mutex_unlock(&s->lock);
qemu_co_queue_restart_all(&l2meta->dependent_requests);
qemu_co_mutex_lock(&s->lock);
next = l2meta->next;
g_free(l2meta);
l2meta = NULL;
l2meta = next;
}
remaining_sectors -= cur_nr_sectors;
@ -886,12 +923,17 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
fail:
qemu_co_mutex_unlock(&s->lock);
if (l2meta != NULL) {
while (l2meta != NULL) {
QCowL2Meta *next;
if (l2meta->nb_clusters != 0) {
QLIST_REMOVE(l2meta, next_in_flight);
}
qemu_co_queue_restart_all(&l2meta->dependent_requests);
next = l2meta->next;
g_free(l2meta);
l2meta = next;
}
qemu_iovec_destroy(&hd_qiov);
@ -930,6 +972,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs)
AES_KEY aes_encrypt_key;
AES_KEY aes_decrypt_key;
uint32_t crypt_method = 0;
QDict *options;
/*
* Backing files are read-only which makes all of their metadata immutable,
@ -944,8 +987,14 @@ static void qcow2_invalidate_cache(BlockDriverState *bs)
qcow2_close(bs);
options = qdict_new();
qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS,
qbool_from_int(s->use_lazy_refcounts));
memset(s, 0, sizeof(BDRVQcowState));
qcow2_open(bs, NULL, flags);
qcow2_open(bs, options, flags);
QDECREF(options);
if (crypt_method) {
s->crypt_method = crypt_method;
@ -1174,7 +1223,8 @@ static int preallocate(BlockDriverState *bs)
ret = qcow2_alloc_cluster_link_l2(bs, meta);
if (ret < 0) {
qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters);
qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
QCOW2_DISCARD_NEVER);
return ret;
}
@ -1246,7 +1296,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
return ret;
}
ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR);
ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
if (ret < 0) {
return ret;
}
@ -1466,7 +1516,8 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
{
BDRVQcowState *s = bs->opaque;
int ret, new_l1_size;
int64_t new_l1_size;
int ret;
if (offset & 511) {
error_report("The new size must be a multiple of 512");
@ -1523,8 +1574,21 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
return 0;
}
if (nb_sectors != s->cluster_sectors)
return -EINVAL;
if (nb_sectors != s->cluster_sectors) {
ret = -EINVAL;
/* Zero-pad last write if image size is not cluster aligned */
if (sector_num + nb_sectors == bs->total_sectors &&
nb_sectors < s->cluster_sectors) {
uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
memset(pad_buf, 0, s->cluster_size);
memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
ret = qcow2_write_compressed(bs, sector_num,
pad_buf, s->cluster_sectors);
qemu_vfree(pad_buf);
}
return ret;
}
out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
@ -1638,8 +1702,8 @@ static void dump_refcounts(BlockDriverState *bs)
}
#endif
static int qcow2_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size)
static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
{
BDRVQcowState *s = bs->opaque;
int growable = bs->growable;
@ -1647,7 +1711,7 @@ static int qcow2_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
bs->growable = 1;
ret = bdrv_pwrite(bs, qcow2_vm_state_offset(s) + pos, buf, size);
ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
bs->growable = growable;
return ret;
@ -1721,6 +1785,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_close = qcow2_close,
.bdrv_reopen_prepare = qcow2_reopen_prepare,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_is_allocated = qcow2_co_is_allocated,
.bdrv_set_key = qcow2_set_key,
.bdrv_make_empty = qcow2_make_empty,

View File

@ -25,7 +25,7 @@
#ifndef BLOCK_QCOW2_H
#define BLOCK_QCOW2_H
#include "block/aes.h"
#include "qemu/aes.h"
#include "block/coroutine.h"
//#define DEBUG_ALLOC
@ -58,6 +58,12 @@
#define DEFAULT_CLUSTER_SIZE 65536
#define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts"
#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
typedef struct QCowHeader {
uint32_t magic;
uint32_t version;
@ -126,12 +132,28 @@ enum {
QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
};
enum qcow2_discard_type {
QCOW2_DISCARD_NEVER = 0,
QCOW2_DISCARD_ALWAYS,
QCOW2_DISCARD_REQUEST,
QCOW2_DISCARD_SNAPSHOT,
QCOW2_DISCARD_OTHER,
QCOW2_DISCARD_MAX
};
typedef struct Qcow2Feature {
uint8_t type;
uint8_t bit;
char name[46];
} QEMU_PACKED Qcow2Feature;
typedef struct Qcow2DiscardRegion {
BlockDriverState *bs;
uint64_t offset;
uint64_t bytes;
QTAILQ_ENTRY(Qcow2DiscardRegion) next;
} Qcow2DiscardRegion;
typedef struct BDRVQcowState {
int cluster_bits;
int cluster_size;
@ -175,6 +197,8 @@ typedef struct BDRVQcowState {
int qcow_version;
bool use_lazy_refcounts;
bool discard_passthrough[QCOW2_DISCARD_MAX];
uint64_t incompatible_features;
uint64_t compatible_features;
uint64_t autoclear_features;
@ -182,6 +206,8 @@ typedef struct BDRVQcowState {
size_t unknown_header_fields_size;
void* unknown_header_fields;
QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
bool cache_discards;
} BDRVQcowState;
/* XXX: use std qcow open function ? */
@ -247,6 +273,9 @@ typedef struct QCowL2Meta
*/
Qcow2COWRegion cow_end;
/** Pointer to next L2Meta of the same write request */
struct QCowL2Meta *next;
QLIST_ENTRY(QCowL2Meta) next_in_flight;
} QCowL2Meta;
@ -263,17 +292,32 @@ enum {
#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
{
return offset & ~(s->cluster_size - 1);
}
static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
{
return offset & (s->cluster_size - 1);
}
static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
{
return (size + (s->cluster_size - 1)) >> s->cluster_bits;
}
static inline int size_to_l1(BDRVQcowState *s, int64_t size)
static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
{
int shift = s->cluster_bits + s->l2_bits;
return (size + (1ULL << shift) - 1) >> shift;
}
static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
{
return (offset >> s->cluster_bits) & (s->l2_size - 1);
}
static inline int64_t align_offset(int64_t offset, int n)
{
offset = (offset + n - 1) & ~(n - 1);
@ -299,6 +343,17 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
}
static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
{
return m->offset + m->cow_start.offset;
}
static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
{
return m->offset + m->cow_end.offset
+ (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
}
// FIXME Need qcow2_ prefix to global functions
/* qcow2.c functions */
@ -317,9 +372,10 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
int nb_clusters);
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
void qcow2_free_clusters(BlockDriverState *bs,
int64_t offset, int64_t size);
void qcow2_free_any_clusters(BlockDriverState *bs,
uint64_t cluster_offset, int nb_clusters);
int64_t offset, int64_t size,
enum qcow2_discard_type type);
void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
int nb_clusters, enum qcow2_discard_type type);
int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend);
@ -327,8 +383,11 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
void qcow2_process_discards(BlockDriverState *bs, int ret);
/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size);
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size);
void qcow2_l2_cache_reset(BlockDriverState *bs);
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,

View File

@ -558,7 +558,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
return ret;
}
ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR | BDRV_O_CACHE_WB);
ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB);
if (ret < 0) {
return ret;
}
@ -1574,6 +1574,7 @@ static BlockDriver bdrv_qed = {
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
.bdrv_create = bdrv_qed_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
.bdrv_make_empty = bdrv_qed_make_empty,
.bdrv_aio_readv = bdrv_qed_aio_readv,

View File

@ -262,15 +262,42 @@ error:
}
#endif
static int raw_open_common(BlockDriverState *bs, const char *filename,
static QemuOptsList raw_runtime_opts = {
.name = "raw",
.head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "File name of the image",
},
{ /* end of list */ }
},
};
static int raw_open_common(BlockDriverState *bs, QDict *options,
int bdrv_flags, int open_flags)
{
BDRVRawState *s = bs->opaque;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
int fd, ret;
opts = qemu_opts_create_nofail(&raw_runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
ret = -EINVAL;
goto fail;
}
filename = qemu_opt_get(opts, "filename");
ret = raw_normalize_devicepath(&filename);
if (ret != 0) {
return ret;
goto fail;
}
s->open_flags = open_flags;
@ -280,16 +307,18 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
fd = qemu_open(filename, s->open_flags, 0644);
if (fd < 0) {
ret = -errno;
if (ret == -EROFS)
if (ret == -EROFS) {
ret = -EACCES;
return ret;
}
goto fail;
}
s->fd = fd;
#ifdef CONFIG_LINUX_AIO
if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
qemu_close(fd);
return -errno;
ret = -errno;
goto fail;
}
#endif
@ -300,15 +329,18 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
}
#endif
return 0;
ret = 0;
fail:
qemu_opts_del(opts);
return ret;
}
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
static int raw_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVRawState *s = bs->opaque;
s->type = FTYPE_FILE;
return raw_open_common(bs, filename, flags, 0);
return raw_open_common(bs, options, flags, 0);
}
static int raw_reopen_prepare(BDRVReopenState *state,
@ -1167,6 +1199,7 @@ static BlockDriver bdrv_file = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_is_allocated = raw_co_is_allocated,
.bdrv_aio_readv = raw_aio_readv,
@ -1292,10 +1325,11 @@ static int check_hdev_writable(BDRVRawState *s)
return 0;
}
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVRawState *s = bs->opaque;
int ret;
const char *filename = qdict_get_str(options, "filename");
#if defined(__APPLE__) && defined(__MACH__)
if (strstart(filename, "/dev/cdrom", NULL)) {
@ -1317,6 +1351,7 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
qemu_close(fd);
}
filename = bsdPath;
qdict_put(options, "filename", qstring_from_str(filename));
}
if ( mediaIterator )
@ -1336,7 +1371,7 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
}
#endif
ret = raw_open_common(bs, filename, flags, 0);
ret = raw_open_common(bs, options, flags, 0);
if (ret < 0) {
return ret;
}
@ -1493,11 +1528,6 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options)
return ret;
}
static int hdev_has_zero_init(BlockDriverState *bs)
{
return 0;
}
static BlockDriver bdrv_host_device = {
.format_name = "host_device",
.protocol_name = "host_device",
@ -1510,7 +1540,6 @@ static BlockDriver bdrv_host_device = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_create = hdev_create,
.create_options = raw_create_options,
.bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
@ -1530,7 +1559,7 @@ static BlockDriver bdrv_host_device = {
};
#ifdef __linux__
static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
static int floppy_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVRawState *s = bs->opaque;
int ret;
@ -1538,7 +1567,7 @@ static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
s->type = FTYPE_FD;
/* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
ret = raw_open_common(bs, filename, flags, O_NONBLOCK);
ret = raw_open_common(bs, options, flags, O_NONBLOCK);
if (ret)
return ret;
@ -1635,7 +1664,6 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_create = hdev_create,
.create_options = raw_create_options,
.bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
@ -1652,14 +1680,14 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_eject = floppy_eject,
};
static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
static int cdrom_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVRawState *s = bs->opaque;
s->type = FTYPE_CD;
/* open will not fail even if no CD is inserted, so add O_NONBLOCK */
return raw_open_common(bs, filename, flags, O_NONBLOCK);
return raw_open_common(bs, options, flags, O_NONBLOCK);
}
static int cdrom_probe_device(const char *filename)
@ -1737,7 +1765,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_create = hdev_create,
.create_options = raw_create_options,
.bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
@ -1760,14 +1787,14 @@ static BlockDriver bdrv_host_cdrom = {
#endif /* __linux__ */
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
static int cdrom_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVRawState *s = bs->opaque;
int ret;
s->type = FTYPE_CD;
ret = raw_open_common(bs, filename, flags, 0);
ret = raw_open_common(bs, options, flags, 0);
if (ret)
return ret;
@ -1859,7 +1886,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_create = hdev_create,
.create_options = raw_create_options,
.bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,

View File

@ -221,20 +221,49 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
}
}
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
static QemuOptsList raw_runtime_opts = {
.name = "raw",
.head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "File name of the image",
},
{ /* end of list */ }
},
};
static int raw_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVRawState *s = bs->opaque;
int access_flags;
DWORD overlapped;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
int ret;
s->type = FTYPE_FILE;
opts = qemu_opts_create_nofail(&raw_runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
ret = -EINVAL;
goto fail;
}
filename = qemu_opt_get(opts, "filename");
raw_parse_flags(flags, &access_flags, &overlapped);
if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
aio = win32_aio_init();
if (aio == NULL) {
return -EINVAL;
ret = -EINVAL;
goto fail;
}
}
@ -244,20 +273,27 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
if (s->hfile == INVALID_HANDLE_VALUE) {
int err = GetLastError();
if (err == ERROR_ACCESS_DENIED)
return -EACCES;
return -EINVAL;
if (err == ERROR_ACCESS_DENIED) {
ret = -EACCES;
} else {
ret = -EINVAL;
}
goto fail;
}
if (flags & BDRV_O_NATIVE_AIO) {
int ret = win32_aio_attach(aio, s->hfile);
ret = win32_aio_attach(aio, s->hfile);
if (ret < 0) {
CloseHandle(s->hfile);
return ret;
goto fail;
}
s->aio = aio;
}
return 0;
ret = 0;
fail:
qemu_opts_del(opts);
return ret;
}
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
@ -423,6 +459,7 @@ static BlockDriver bdrv_file = {
.bdrv_file_open = raw_open,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
@ -494,12 +531,13 @@ static int hdev_probe_device(const char *filename)
return 0;
}
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVRawState *s = bs->opaque;
int access_flags, create_flags;
DWORD overlapped;
char device_name[64];
const char *filename = qdict_get_str(options, "filename");
if (strstart(filename, "/dev/cdrom", NULL)) {
if (find_cdrom(device_name, sizeof(device_name)) < 0)
@ -533,11 +571,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
return 0;
}
static int hdev_has_zero_init(BlockDriverState *bs)
{
return 0;
}
static BlockDriver bdrv_host_device = {
.format_name = "host_device",
.protocol_name = "host_device",
@ -545,7 +578,6 @@ static BlockDriver bdrv_host_device = {
.bdrv_probe_device = hdev_probe_device,
.bdrv_file_open = hdev_open,
.bdrv_close = raw_close,
.bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,

View File

@ -1,3 +1,26 @@
/*
* Block driver for RAW format
*
* Copyright (c) 2006 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu-common.h"
#include "block/block_int.h"
@ -42,6 +65,13 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
}
static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors)
{
return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
}
static int64_t raw_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file);
@ -114,6 +144,11 @@ static int raw_has_zero_init(BlockDriverState *bs)
return bdrv_has_zero_init(bs->file);
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
return bdrv_get_info(bs->file, bdi);
}
static BlockDriver bdrv_raw = {
.format_name = "raw",
@ -128,10 +163,12 @@ static BlockDriver bdrv_raw = {
.bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev,
.bdrv_co_is_allocated = raw_co_is_allocated,
.bdrv_co_write_zeroes = raw_co_write_zeroes,
.bdrv_co_discard = raw_co_discard,
.bdrv_probe = raw_probe,
.bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
.bdrv_truncate = raw_truncate,
.bdrv_is_inserted = raw_is_inserted,

View File

@ -63,7 +63,8 @@
typedef enum {
RBD_AIO_READ,
RBD_AIO_WRITE,
RBD_AIO_DISCARD
RBD_AIO_DISCARD,
RBD_AIO_FLUSH
} RBDAIOCmd;
typedef struct RBDAIOCB {
@ -379,8 +380,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
r = rcb->ret;
if (acb->cmd == RBD_AIO_WRITE ||
acb->cmd == RBD_AIO_DISCARD) {
if (acb->cmd != RBD_AIO_READ) {
if (r < 0) {
acb->ret = r;
acb->error = 1;
@ -441,7 +441,21 @@ static int qemu_rbd_aio_flush_cb(void *opaque)
return (s->qemu_aio_count > 0);
}
static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags)
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "rbd",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "Specification of the rbd image",
},
{ /* end of list */ }
},
};
static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVRBDState *s = bs->opaque;
char pool[RBD_MAX_POOL_NAME_SIZE];
@ -449,20 +463,35 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags)
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
int r;
opts = qemu_opts_create_nofail(&runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
qemu_opts_del(opts);
return -EINVAL;
}
filename = qemu_opt_get(opts, "filename");
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
s->name, sizeof(s->name),
conf, sizeof(conf)) < 0) {
return -EINVAL;
r = -EINVAL;
goto failed_opts;
}
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
r = rados_create(&s->cluster, clientname);
if (r < 0) {
error_report("error initializing");
return r;
goto failed_opts;
}
s->snap = NULL;
@ -528,6 +557,7 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags)
NULL, qemu_rbd_aio_flush_cb, s);
qemu_opts_del(opts);
return 0;
failed:
@ -537,6 +567,8 @@ failed_open:
failed_shutdown:
rados_shutdown(s->cluster);
g_free(s->snap);
failed_opts:
qemu_opts_del(opts);
return r;
}
@ -658,6 +690,16 @@ static int rbd_aio_discard_wrapper(rbd_image_t image,
#endif
}
static int rbd_aio_flush_wrapper(rbd_image_t image,
rbd_completion_t comp)
{
#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
return rbd_aio_flush(image, comp);
#else
return -ENOTSUP;
#endif
}
static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@ -678,7 +720,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
acb->cmd = cmd;
acb->qiov = qiov;
if (cmd == RBD_AIO_DISCARD) {
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
acb->bounce = qemu_blockalign(bs, qiov->size);
@ -722,6 +764,9 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
case RBD_AIO_DISCARD:
r = rbd_aio_discard_wrapper(s->image, off, size, c);
break;
case RBD_AIO_FLUSH:
r = rbd_aio_flush_wrapper(s->image, c);
break;
default:
r = -EINVAL;
}
@ -761,6 +806,16 @@ static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
RBD_AIO_WRITE);
}
#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb,
void *opaque)
{
return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
}
#else
static int qemu_rbd_co_flush(BlockDriverState *bs)
{
#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
@ -771,6 +826,7 @@ static int qemu_rbd_co_flush(BlockDriverState *bs)
return 0;
#endif
}
#endif
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
{
@ -940,6 +996,7 @@ static BlockDriver bdrv_rbd = {
.bdrv_file_open = qemu_rbd_open,
.bdrv_close = qemu_rbd_close,
.bdrv_create = qemu_rbd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_get_info = qemu_rbd_getinfo,
.create_options = qemu_rbd_create_options,
.bdrv_getlength = qemu_rbd_getlength,
@ -948,7 +1005,12 @@ static BlockDriver bdrv_rbd = {
.bdrv_aio_readv = qemu_rbd_aio_readv,
.bdrv_aio_writev = qemu_rbd_aio_writev,
#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
.bdrv_aio_flush = qemu_rbd_aio_flush,
#else
.bdrv_co_flush_to_disk = qemu_rbd_co_flush,
#endif
#ifdef LIBRBD_SUPPORTS_DISCARD
.bdrv_aio_discard = qemu_rbd_aio_discard,

View File

@ -27,6 +27,8 @@
#define SD_OP_CREATE_AND_WRITE_OBJ 0x01
#define SD_OP_READ_OBJ 0x02
#define SD_OP_WRITE_OBJ 0x03
/* 0x04 is used internally by Sheepdog */
#define SD_OP_DISCARD_OBJ 0x05
#define SD_OP_NEW_VDI 0x11
#define SD_OP_LOCK_VDI 0x12
@ -34,6 +36,7 @@
#define SD_OP_GET_VDI_INFO 0x14
#define SD_OP_READ_VDIS 0x15
#define SD_OP_FLUSH_VDI 0x16
#define SD_OP_DEL_VDI 0x17
#define SD_FLAG_CMD_WRITE 0x01
#define SD_FLAG_CMD_COW 0x02
@ -65,6 +68,8 @@
#define SD_RES_WAIT_FOR_FORMAT 0x16 /* Waiting for a format operation */
#define SD_RES_WAIT_FOR_JOIN 0x17 /* Waiting for other nodes joining */
#define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */
#define SD_RES_HALT 0x19 /* Sheepdog is stopped serving IO request */
#define SD_RES_READONLY 0x1A /* Object is read-only */
/*
* Object ID rules
@ -86,7 +91,6 @@
#define SD_NR_VDIS (1U << 24)
#define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
#define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
#define SECTOR_SIZE 512
#define SD_INODE_SIZE (sizeof(SheepdogInode))
#define CURRENT_VDI_ID 0
@ -238,14 +242,14 @@ static inline bool is_snapshot(struct SheepdogInode *inode)
return !!inode->snap_ctime;
}
#undef dprintf
#undef DPRINTF
#ifdef DEBUG_SDOG
#define dprintf(fmt, args...) \
#define DPRINTF(fmt, args...) \
do { \
fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \
} while (0)
#else
#define dprintf(fmt, args...)
#define DPRINTF(fmt, args...)
#endif
typedef struct SheepdogAIOCB SheepdogAIOCB;
@ -268,6 +272,7 @@ enum AIOCBState {
AIOCB_WRITE_UDATA,
AIOCB_READ_UDATA,
AIOCB_FLUSH_CACHE,
AIOCB_DISCARD_OBJ,
};
struct SheepdogAIOCB {
@ -297,6 +302,7 @@ typedef struct BDRVSheepdogState {
char name[SD_MAX_VDI_LEN];
bool is_snapshot;
uint32_t cache_flags;
bool discard_supported;
char *host_spec;
bool is_unix;
@ -344,6 +350,8 @@ static const char * sd_strerror(int err)
{SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"},
{SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"},
{SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"},
{SD_RES_HALT, "Sheepdog is stopped serving IO request"},
{SD_RES_READONLY, "Object is read-only"},
};
for (i = 0; i < ARRAY_SIZE(errors); ++i) {
@ -469,7 +477,7 @@ static int connect_to_sdog(BDRVSheepdogState *s)
qerror_report_err(err);
error_free(err);
} else {
socket_set_nonblock(fd);
qemu_set_nonblock(fd);
}
return fd;
@ -598,6 +606,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
struct iovec *iov, int niov, bool create,
enum AIOCBState aiocb_type);
static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid)
@ -654,7 +663,7 @@ static void coroutine_fn aio_read_response(void *opaque)
int ret;
AIOReq *aio_req = NULL;
SheepdogAIOCB *acb;
unsigned long idx;
uint64_t idx;
if (QLIST_EMPTY(&s->inflight_aio_head)) {
goto out;
@ -720,16 +729,41 @@ static void coroutine_fn aio_read_response(void *opaque)
break;
case AIOCB_FLUSH_CACHE:
if (rsp.result == SD_RES_INVALID_PARMS) {
dprintf("disable cache since the server doesn't support it\n");
DPRINTF("disable cache since the server doesn't support it\n");
s->cache_flags = SD_FLAG_CMD_DIRECT;
rsp.result = SD_RES_SUCCESS;
}
break;
case AIOCB_DISCARD_OBJ:
switch (rsp.result) {
case SD_RES_INVALID_PARMS:
error_report("sheep(%s) doesn't support discard command",
s->host_spec);
rsp.result = SD_RES_SUCCESS;
s->discard_supported = false;
break;
case SD_RES_SUCCESS:
idx = data_oid_to_idx(aio_req->oid);
s->inode.data_vdi_id[idx] = 0;
break;
default:
break;
}
}
if (rsp.result != SD_RES_SUCCESS) {
switch (rsp.result) {
case SD_RES_SUCCESS:
break;
case SD_RES_READONLY:
ret = resend_aioreq(s, aio_req);
if (ret == SD_RES_SUCCESS) {
goto out;
}
/* fall through */
default:
acb->ret = -EIO;
error_report("%s", sd_strerror(rsp.result));
break;
}
free_aio_req(s, aio_req);
@ -921,8 +955,9 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
return ret;
}
static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
char *tag, uint32_t *vid, int for_snapshot)
static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
uint32_t snapid, const char *tag, uint32_t *vid,
bool lock)
{
int ret, fd;
SheepdogVdiReq hdr;
@ -943,10 +978,10 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN);
memset(&hdr, 0, sizeof(hdr));
if (for_snapshot) {
hdr.opcode = SD_OP_GET_VDI_INFO;
} else {
if (lock) {
hdr.opcode = SD_OP_LOCK_VDI;
} else {
hdr.opcode = SD_OP_GET_VDI_INFO;
}
wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN;
hdr.proto_ver = SD_PROTO_VER;
@ -1014,6 +1049,9 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
wlen = datalen;
hdr.flags = SD_FLAG_CMD_WRITE | flags;
break;
case AIOCB_DISCARD_OBJ:
hdr.opcode = SD_OP_DISCARD_OBJ;
break;
}
if (s->cache_flags) {
@ -1124,7 +1162,104 @@ static int write_object(int fd, char *buf, uint64_t oid, int copies,
create, cache_flags);
}
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
/* update inode with the latest state */
static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
{
SheepdogInode *inode;
int ret = 0, fd;
uint32_t vid = 0;
fd = connect_to_sdog(s);
if (fd < 0) {
return -EIO;
}
inode = g_malloc(sizeof(s->inode));
ret = find_vdi_name(s, s->name, snapid, tag, &vid, false);
if (ret) {
goto out;
}
ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid),
s->inode.nr_copies, sizeof(*inode), 0, s->cache_flags);
if (ret < 0) {
goto out;
}
if (inode->vdi_id != s->inode.vdi_id) {
memcpy(&s->inode, inode, sizeof(s->inode));
}
out:
g_free(inode);
closesocket(fd);
return ret;
}
static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
{
SheepdogAIOCB *acb = aio_req->aiocb;
bool create = false;
int ret;
ret = reload_inode(s, 0, "");
if (ret < 0) {
return ret;
}
aio_req->oid = vid_to_data_oid(s->inode.vdi_id,
data_oid_to_idx(aio_req->oid));
/* check whether this request becomes a CoW one */
if (acb->aiocb_type == AIOCB_WRITE_UDATA) {
int idx = data_oid_to_idx(aio_req->oid);
AIOReq *areq;
if (s->inode.data_vdi_id[idx] == 0) {
create = true;
goto out;
}
if (is_data_obj_writable(&s->inode, idx)) {
goto out;
}
/* link to the pending list if there is another CoW request to
* the same object */
QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) {
if (areq != aio_req && areq->oid == aio_req->oid) {
DPRINTF("simultaneous CoW to %" PRIx64 "\n", aio_req->oid);
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
return SD_RES_SUCCESS;
}
}
aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
aio_req->flags |= SD_FLAG_CMD_COW;
create = true;
}
out:
return add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
create, acb->aiocb_type);
}
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "sheepdog",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "filename",
.type = QEMU_OPT_STRING,
.help = "URL to the sheepdog image",
},
{ /* end of list */ }
},
};
static int sd_open(BlockDriverState *bs, QDict *options, int flags)
{
int ret, fd;
uint32_t vid = 0;
@ -1132,6 +1267,20 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid;
char *buf = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
opts = qemu_opts_create_nofail(&runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
ret = -EINVAL;
goto out;
}
filename = qemu_opt_get(opts, "filename");
QLIST_INIT(&s->inflight_aio_head);
QLIST_INIT(&s->pending_aio_head);
@ -1154,7 +1303,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
goto out;
}
ret = find_vdi_name(s, vdi, snapid, tag, &vid, 0);
ret = find_vdi_name(s, vdi, snapid, tag, &vid, true);
if (ret) {
goto out;
}
@ -1167,9 +1316,10 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
if (flags & BDRV_O_NOCACHE) {
s->cache_flags = SD_FLAG_CMD_DIRECT;
}
s->discard_supported = true;
if (snapid || tag[0] != '\0') {
dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
DPRINTF("%" PRIx32 " snapshot inode was open.\n", vid);
s->is_snapshot = true;
}
@ -1193,9 +1343,10 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
s->min_dirty_data_idx = UINT32_MAX;
s->max_dirty_data_idx = 0;
bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE;
bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
pstrcpy(s->name, sizeof(s->name), vdi);
qemu_co_mutex_init(&s->lock);
qemu_opts_del(opts);
g_free(buf);
return 0;
out:
@ -1203,6 +1354,7 @@ out:
if (s->fd >= 0) {
closesocket(s->fd);
}
qemu_opts_del(opts);
g_free(buf);
return ret;
}
@ -1267,7 +1419,7 @@ static int sd_prealloc(const char *filename)
void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
int ret;
ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR);
ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
if (ret < 0) {
goto out;
}
@ -1358,14 +1510,14 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
BlockDriver *drv;
/* Currently, only Sheepdog backing image is supported. */
drv = bdrv_find_protocol(backing_file);
drv = bdrv_find_protocol(backing_file, true);
if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
error_report("backing_file must be a sheepdog image");
ret = -EINVAL;
goto out;
}
ret = bdrv_file_open(&bs, backing_file, 0);
ret = bdrv_file_open(&bs, backing_file, NULL, 0);
if (ret < 0) {
goto out;
}
@ -1402,7 +1554,7 @@ static void sd_close(BlockDriverState *bs)
unsigned int wlen, rlen = 0;
int fd, ret;
dprintf("%s\n", s->name);
DPRINTF("%s\n", s->name);
fd = connect_to_sdog(s);
if (fd < 0) {
@ -1515,6 +1667,43 @@ out:
sd_finish_aiocb(acb);
}
/* Delete current working VDI on the snapshot chain */
static bool sd_delete(BDRVSheepdogState *s)
{
unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0;
SheepdogVdiReq hdr = {
.opcode = SD_OP_DEL_VDI,
.vdi_id = s->inode.vdi_id,
.data_length = wlen,
.flags = SD_FLAG_CMD_WRITE,
};
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
int fd, ret;
fd = connect_to_sdog(s);
if (fd < 0) {
return false;
}
ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
closesocket(fd);
if (ret) {
return false;
}
switch (rsp->result) {
case SD_RES_NO_VDI:
error_report("%s was already deleted", s->name);
/* fall through */
case SD_RES_SUCCESS:
break;
default:
error_report("%s, %s", sd_strerror(rsp->result), s->name);
return false;
}
return true;
}
/*
* Create a writable VDI from a snapshot
*/
@ -1523,17 +1712,25 @@ static int sd_create_branch(BDRVSheepdogState *s)
int ret, fd;
uint32_t vid;
char *buf;
bool deleted;
dprintf("%" PRIx32 " is snapshot.\n", s->inode.vdi_id);
DPRINTF("%" PRIx32 " is snapshot.\n", s->inode.vdi_id);
buf = g_malloc(SD_INODE_SIZE);
ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1);
/*
* Even If deletion fails, we will just create extra snapshot based on
* the workding VDI which was supposed to be deleted. So no need to
* false bail out.
*/
deleted = sd_delete(s);
ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid,
!deleted);
if (ret) {
goto out;
}
dprintf("%" PRIx32 " is created.\n", vid);
DPRINTF("%" PRIx32 " is created.\n", vid);
fd = connect_to_sdog(s);
if (fd < 0) {
@ -1554,7 +1751,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
s->is_snapshot = false;
ret = 0;
dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
DPRINTF("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
out:
g_free(buf);
@ -1578,10 +1775,10 @@ static int coroutine_fn sd_co_rw_vector(void *p)
{
SheepdogAIOCB *acb = p;
int ret = 0;
unsigned long len, done = 0, total = acb->nb_sectors * SECTOR_SIZE;
unsigned long idx = acb->sector_num * SECTOR_SIZE / SD_DATA_OBJ_SIZE;
unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
unsigned long idx = acb->sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE;
uint64_t oid;
uint64_t offset = (acb->sector_num * SECTOR_SIZE) % SD_DATA_OBJ_SIZE;
uint64_t offset = (acb->sector_num * BDRV_SECTOR_SIZE) % SD_DATA_OBJ_SIZE;
BDRVSheepdogState *s = acb->common.bs->opaque;
SheepdogInode *inode = &s->inode;
AIOReq *aio_req;
@ -1630,16 +1827,25 @@ static int coroutine_fn sd_co_rw_vector(void *p)
flags = SD_FLAG_CMD_COW;
}
break;
case AIOCB_DISCARD_OBJ:
/*
* We discard the object only when the whole object is
* 1) allocated 2) trimmed. Otherwise, simply skip it.
*/
if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) {
goto done;
}
break;
default:
break;
}
if (create) {
dprintf("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n",
DPRINTF("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n",
inode->vdi_id, oid,
vid_to_data_oid(inode->data_vdi_id[idx], idx), idx);
oid = vid_to_data_oid(inode->vdi_id, idx);
dprintf("new oid %" PRIx64 "\n", oid);
DPRINTF("new oid %" PRIx64 "\n", oid);
}
aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
@ -1691,7 +1897,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
int ret;
if (bs->growable && sector_num + nb_sectors > bs->total_sectors) {
ret = sd_truncate(bs, (sector_num + nb_sectors) * SECTOR_SIZE);
ret = sd_truncate(bs, (sector_num + nb_sectors) * BDRV_SECTOR_SIZE);
if (ret < 0) {
return ret;
}
@ -1772,7 +1978,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
SheepdogInode *inode;
unsigned int datalen;
dprintf("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " "
DPRINTF("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " "
"is_snapshot %d\n", sn_info->name, sn_info->id_str,
s->name, sn_info->vm_state_size, s->is_snapshot);
@ -1783,7 +1989,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
return -EINVAL;
}
dprintf("%s %s\n", sn_info->name, sn_info->id_str);
DPRINTF("%s %s\n", sn_info->name, sn_info->id_str);
s->inode.vm_state_size = sn_info->vm_state_size;
s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
@ -1827,7 +2033,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
memcpy(&s->inode, inode, datalen);
dprintf("s->inode: name %s snap_id %x oid %x\n",
DPRINTF("s->inode: name %s snap_id %x oid %x\n",
s->inode.name, s->inode.snap_id, s->inode.vdi_id);
cleanup:
@ -1835,69 +2041,47 @@ cleanup:
return ret;
}
/*
* We implement rollback(loadvm) operation to the specified snapshot by
* 1) switch to the snapshot
* 2) rely on sd_create_branch to delete working VDI and
* 3) create a new working VDI based on the speicified snapshot
*/
static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
{
BDRVSheepdogState *s = bs->opaque;
BDRVSheepdogState *old_s;
char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
char *buf = NULL;
uint32_t vid;
char tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid = 0;
int ret = 0, fd;
int ret = 0;
old_s = g_malloc(sizeof(BDRVSheepdogState));
memcpy(old_s, s, sizeof(BDRVSheepdogState));
pstrcpy(vdi, sizeof(vdi), s->name);
snapid = strtoul(snapshot_id, NULL, 10);
if (snapid) {
tag[0] = 0;
} else {
pstrcpy(tag, sizeof(tag), s->name);
pstrcpy(tag, sizeof(tag), snapshot_id);
}
ret = find_vdi_name(s, vdi, snapid, tag, &vid, 1);
if (ret) {
error_report("Failed to find_vdi_name");
goto out;
}
fd = connect_to_sdog(s);
if (fd < 0) {
ret = fd;
goto out;
}
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
SD_INODE_SIZE, 0, s->cache_flags);
closesocket(fd);
ret = reload_inode(s, snapid, tag);
if (ret) {
goto out;
}
memcpy(&s->inode, buf, sizeof(s->inode));
if (!s->inode.vm_state_size) {
error_report("Invalid snapshot");
ret = -ENOENT;
ret = sd_create_branch(s);
if (ret) {
goto out;
}
s->is_snapshot = true;
g_free(buf);
g_free(old_s);
return 0;
out:
/* recover bdrv_sd_state */
memcpy(s, old_s, sizeof(BDRVSheepdogState));
g_free(buf);
g_free(old_s);
error_report("failed to open. recover old bdrv_sd_state.");
@ -2009,8 +2193,9 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
int fd, ret = 0, remaining = size;
unsigned int data_len;
uint64_t vmstate_oid;
uint32_t vdi_index;
uint64_t offset;
uint32_t vdi_index;
uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id;
fd = connect_to_sdog(s);
if (fd < 0) {
@ -2023,7 +2208,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset);
vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index);
vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index);
create = (offset == 0);
if (load) {
@ -2051,12 +2236,19 @@ cleanup:
return ret;
}
static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data,
int64_t pos, int size)
static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
{
BDRVSheepdogState *s = bs->opaque;
void *buf;
int ret;
return do_load_save_vmstate(s, (uint8_t *)data, pos, size, 0);
buf = qemu_blockalign(bs, qiov->size);
qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
ret = do_load_save_vmstate(s, (uint8_t *) buf, pos, qiov->size, 0);
qemu_vfree(buf);
return ret;
}
static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
@ -2068,6 +2260,67 @@ static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
}
static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
SheepdogAIOCB *acb;
QEMUIOVector dummy;
BDRVSheepdogState *s = bs->opaque;
int ret;
if (!s->discard_supported) {
return 0;
}
acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
acb->aiocb_type = AIOCB_DISCARD_OBJ;
acb->aio_done_func = sd_finish_aiocb;
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
qemu_aio_release(acb);
return ret;
}
qemu_coroutine_yield();
return acb->ret;
}
static coroutine_fn int
sd_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
int *pnum)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogInode *inode = &s->inode;
unsigned long start = sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE,
end = DIV_ROUND_UP((sector_num + nb_sectors) *
BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE);
unsigned long idx;
int ret = 1;
for (idx = start; idx < end; idx++) {
if (inode->data_vdi_id[idx] == 0) {
break;
}
}
if (idx == start) {
/* Get the longest length of unallocated sectors */
ret = 0;
for (idx = start + 1; idx < end; idx++) {
if (inode->data_vdi_id[idx] != 0) {
break;
}
}
}
*pnum = (idx - start) * SD_DATA_OBJ_SIZE / BDRV_SECTOR_SIZE;
if (*pnum > nb_sectors) {
*pnum = nb_sectors;
}
return ret;
}
static QEMUOptionParameter sd_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@ -2094,12 +2347,15 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_file_open = sd_open,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
.bdrv_co_is_allocated = sd_co_is_allocated,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
@ -2119,12 +2375,15 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_file_open = sd_open,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
.bdrv_co_is_allocated = sd_co_is_allocated,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
@ -2144,12 +2403,15 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_file_open = sd_open,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
.bdrv_co_is_allocated = sd_co_is_allocated,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,

157
block/snapshot.c Normal file
View File

@ -0,0 +1,157 @@
/*
* Block layer snapshot related functions
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "block/snapshot.h"
#include "block/block_int.h"
int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
const char *name)
{
QEMUSnapshotInfo *sn_tab, *sn;
int nb_sns, i, ret;
ret = -ENOENT;
nb_sns = bdrv_snapshot_list(bs, &sn_tab);
if (nb_sns < 0) {
return ret;
}
for (i = 0; i < nb_sns; i++) {
sn = &sn_tab[i];
if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
*sn_info = *sn;
ret = 0;
break;
}
}
g_free(sn_tab);
return ret;
}
int bdrv_can_snapshot(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
return 0;
}
if (!drv->bdrv_snapshot_create) {
if (bs->file != NULL) {
return bdrv_can_snapshot(bs->file);
}
return 0;
}
return 1;
}
int bdrv_snapshot_create(BlockDriverState *bs,
QEMUSnapshotInfo *sn_info)
{
BlockDriver *drv = bs->drv;
if (!drv) {
return -ENOMEDIUM;
}
if (drv->bdrv_snapshot_create) {
return drv->bdrv_snapshot_create(bs, sn_info);
}
if (bs->file) {
return bdrv_snapshot_create(bs->file, sn_info);
}
return -ENOTSUP;
}
int bdrv_snapshot_goto(BlockDriverState *bs,
const char *snapshot_id)
{
BlockDriver *drv = bs->drv;
int ret, open_ret;
if (!drv) {
return -ENOMEDIUM;
}
if (drv->bdrv_snapshot_goto) {
return drv->bdrv_snapshot_goto(bs, snapshot_id);
}
if (bs->file) {
drv->bdrv_close(bs);
ret = bdrv_snapshot_goto(bs->file, snapshot_id);
open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
if (open_ret < 0) {
bdrv_delete(bs->file);
bs->drv = NULL;
return open_ret;
}
return ret;
}
return -ENOTSUP;
}
int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
{
BlockDriver *drv = bs->drv;
if (!drv) {
return -ENOMEDIUM;
}
if (drv->bdrv_snapshot_delete) {
return drv->bdrv_snapshot_delete(bs, snapshot_id);
}
if (bs->file) {
return bdrv_snapshot_delete(bs->file, snapshot_id);
}
return -ENOTSUP;
}
int bdrv_snapshot_list(BlockDriverState *bs,
QEMUSnapshotInfo **psn_info)
{
BlockDriver *drv = bs->drv;
if (!drv) {
return -ENOMEDIUM;
}
if (drv->bdrv_snapshot_list) {
return drv->bdrv_snapshot_list(bs, psn_info);
}
if (bs->file) {
return bdrv_snapshot_list(bs->file, psn_info);
}
return -ENOTSUP;
}
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
const char *snapshot_name)
{
BlockDriver *drv = bs->drv;
if (!drv) {
return -ENOMEDIUM;
}
if (!bs->read_only) {
return -EINVAL;
}
if (drv->bdrv_snapshot_load_tmp) {
return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
}
return -ENOTSUP;
}

1076
block/ssh.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -198,7 +198,7 @@ static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}
static BlockJobType stream_job_type = {
static const BlockJobType stream_job_type = {
.instance_size = sizeof(StreamBlockJob),
.job_type = "stream",
.set_speed = stream_set_speed,

View File

@ -779,6 +779,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_close = vdi_close,
.bdrv_reopen_prepare = vdi_reopen_prepare,
.bdrv_create = vdi_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_is_allocated = vdi_co_is_allocated,
.bdrv_make_empty = vdi_make_empty,

972
block/vhdx.c Normal file
View File

@ -0,0 +1,972 @@
/*
* Block driver for Hyper-V VHDX Images
*
* Copyright (c) 2013 Red Hat, Inc.,
*
* Authors:
* Jeff Cody <jcody@redhat.com>
*
* This is based on the "VHDX Format Specification v0.95", published 4/12/2012
* by Microsoft:
* https://www.microsoft.com/en-us/download/details.aspx?id=29681
*
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
* See the COPYING.LIB file in the top-level directory.
*
*/
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
#include "qemu/crc32c.h"
#include "block/vhdx.h"
/* Several metadata and region table data entries are identified by
* guids in a MS-specific GUID format. */
/* ------- Known Region Table GUIDs ---------------------- */
static const MSGUID bat_guid = { .data1 = 0x2dc27766,
.data2 = 0xf623,
.data3 = 0x4200,
.data4 = { 0x9d, 0x64, 0x11, 0x5e,
0x9b, 0xfd, 0x4a, 0x08} };
static const MSGUID metadata_guid = { .data1 = 0x8b7ca206,
.data2 = 0x4790,
.data3 = 0x4b9a,
.data4 = { 0xb8, 0xfe, 0x57, 0x5f,
0x05, 0x0f, 0x88, 0x6e} };
/* ------- Known Metadata Entry GUIDs ---------------------- */
static const MSGUID file_param_guid = { .data1 = 0xcaa16737,
.data2 = 0xfa36,
.data3 = 0x4d43,
.data4 = { 0xb3, 0xb6, 0x33, 0xf0,
0xaa, 0x44, 0xe7, 0x6b} };
static const MSGUID virtual_size_guid = { .data1 = 0x2FA54224,
.data2 = 0xcd1b,
.data3 = 0x4876,
.data4 = { 0xb2, 0x11, 0x5d, 0xbe,
0xd8, 0x3b, 0xf4, 0xb8} };
static const MSGUID page83_guid = { .data1 = 0xbeca12ab,
.data2 = 0xb2e6,
.data3 = 0x4523,
.data4 = { 0x93, 0xef, 0xc3, 0x09,
0xe0, 0x00, 0xc7, 0x46} };
static const MSGUID phys_sector_guid = { .data1 = 0xcda348c7,
.data2 = 0x445d,
.data3 = 0x4471,
.data4 = { 0x9c, 0xc9, 0xe9, 0x88,
0x52, 0x51, 0xc5, 0x56} };
static const MSGUID parent_locator_guid = { .data1 = 0xa8d35f2d,
.data2 = 0xb30b,
.data3 = 0x454d,
.data4 = { 0xab, 0xf7, 0xd3,
0xd8, 0x48, 0x34,
0xab, 0x0c} };
static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d,
.data2 = 0xa96f,
.data3 = 0x4709,
.data4 = { 0xba, 0x47, 0xf2,
0x33, 0xa8, 0xfa,
0xab, 0x5f} };
/* Each parent type must have a valid GUID; this is for parent images
* of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would
* need to make up our own QCOW2 GUID type */
static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7,
.data2 = 0xd19e,
.data3 = 0x4a81,
.data4 = { 0xb7, 0x89, 0x25, 0xb8,
0xe9, 0x44, 0x59, 0x13} };
#define META_FILE_PARAMETER_PRESENT 0x01
#define META_VIRTUAL_DISK_SIZE_PRESENT 0x02
#define META_PAGE_83_PRESENT 0x04
#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
#define META_PHYS_SECTOR_SIZE_PRESENT 0x10
#define META_PARENT_LOCATOR_PRESENT 0x20
#define META_ALL_PRESENT \
(META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
META_PHYS_SECTOR_SIZE_PRESENT)
typedef struct VHDXMetadataEntries {
VHDXMetadataTableEntry file_parameters_entry;
VHDXMetadataTableEntry virtual_disk_size_entry;
VHDXMetadataTableEntry page83_data_entry;
VHDXMetadataTableEntry logical_sector_size_entry;
VHDXMetadataTableEntry phys_sector_size_entry;
VHDXMetadataTableEntry parent_locator_entry;
uint16_t present;
} VHDXMetadataEntries;
typedef struct VHDXSectorInfo {
uint32_t bat_idx; /* BAT entry index */
uint32_t sectors_avail; /* sectors available in payload block */
uint32_t bytes_left; /* bytes left in the block after data to r/w */
uint32_t bytes_avail; /* bytes available in payload block */
uint64_t file_offset; /* absolute offset in bytes, in file */
uint64_t block_offset; /* block offset, in bytes */
} VHDXSectorInfo;
typedef struct BDRVVHDXState {
CoMutex lock;
int curr_header;
VHDXHeader *headers[2];
VHDXRegionTableHeader rt;
VHDXRegionTableEntry bat_rt; /* region table for the BAT */
VHDXRegionTableEntry metadata_rt; /* region table for the metadata */
VHDXMetadataTableHeader metadata_hdr;
VHDXMetadataEntries metadata_entries;
VHDXFileParameters params;
uint32_t block_size;
uint32_t block_size_bits;
uint32_t sectors_per_block;
uint32_t sectors_per_block_bits;
uint64_t virtual_disk_size;
uint32_t logical_sector_size;
uint32_t physical_sector_size;
uint64_t chunk_ratio;
uint32_t chunk_ratio_bits;
uint32_t logical_sector_size_bits;
uint32_t bat_entries;
VHDXBatEntry *bat;
uint64_t bat_offset;
VHDXParentLocatorHeader parent_header;
VHDXParentLocatorEntry *parent_entries;
} BDRVVHDXState;
uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
int crc_offset)
{
uint32_t crc_new;
uint32_t crc_orig;
assert(buf != NULL);
if (crc_offset > 0) {
memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
memset(buf + crc_offset, 0, sizeof(crc_orig));
}
crc_new = crc32c(crc, buf, size);
if (crc_offset > 0) {
memcpy(buf + crc_offset, &crc_orig, sizeof(crc_orig));
}
return crc_new;
}
/* Validates the checksum of the buffer, with an in-place CRC.
*
* Zero is substituted during crc calculation for the original crc field,
* and the crc field is restored afterwards. But the buffer will be modifed
* during the calculation, so this may not be not suitable for multi-threaded
* use.
*
* crc_offset: byte offset in buf of the buffer crc
* buf: buffer pointer
* size: size of buffer (must be > crc_offset+4)
*
* returns true if checksum is valid, false otherwise
*/
bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
{
uint32_t crc_orig;
uint32_t crc;
assert(buf != NULL);
assert(size > (crc_offset + 4));
memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
crc_orig = le32_to_cpu(crc_orig);
crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset);
return crc == crc_orig;
}
/*
* Per the MS VHDX Specification, for every VHDX file:
* - The header section is fixed size - 1 MB
* - The header section is always the first "object"
* - The first 64KB of the header is the File Identifier
* - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
* - The following 512 bytes constitute a UTF-16 string identifiying the
* software that created the file, and is optional and diagnostic only.
*
* Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
*/
static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
{
if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
return 100;
}
return 0;
}
/* All VHDX structures on disk are little endian */
static void vhdx_header_le_import(VHDXHeader *h)
{
assert(h != NULL);
le32_to_cpus(&h->signature);
le32_to_cpus(&h->checksum);
le64_to_cpus(&h->sequence_number);
leguid_to_cpus(&h->file_write_guid);
leguid_to_cpus(&h->data_write_guid);
leguid_to_cpus(&h->log_guid);
le16_to_cpus(&h->log_version);
le16_to_cpus(&h->version);
le32_to_cpus(&h->log_length);
le64_to_cpus(&h->log_offset);
}
/* opens the specified header block from the VHDX file header section */
static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
{
int ret = 0;
VHDXHeader *header1;
VHDXHeader *header2;
bool h1_valid = false;
bool h2_valid = false;
uint64_t h1_seq = 0;
uint64_t h2_seq = 0;
uint8_t *buffer;
header1 = qemu_blockalign(bs, sizeof(VHDXHeader));
header2 = qemu_blockalign(bs, sizeof(VHDXHeader));
buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
s->headers[0] = header1;
s->headers[1] = header2;
/* We have to read the whole VHDX_HEADER_SIZE instead of
* sizeof(VHDXHeader), because the checksum is over the whole
* region */
ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
/* copy over just the relevant portion that we need */
memcpy(header1, buffer, sizeof(VHDXHeader));
vhdx_header_le_import(header1);
if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
!memcmp(&header1->signature, "head", 4) &&
header1->version == 1) {
h1_seq = header1->sequence_number;
h1_valid = true;
}
ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
/* copy over just the relevant portion that we need */
memcpy(header2, buffer, sizeof(VHDXHeader));
vhdx_header_le_import(header2);
if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
!memcmp(&header2->signature, "head", 4) &&
header2->version == 1) {
h2_seq = header2->sequence_number;
h2_valid = true;
}
/* If there is only 1 valid header (or no valid headers), we
* don't care what the sequence numbers are */
if (h1_valid && !h2_valid) {
s->curr_header = 0;
} else if (!h1_valid && h2_valid) {
s->curr_header = 1;
} else if (!h1_valid && !h2_valid) {
ret = -EINVAL;
goto fail;
} else {
/* If both headers are valid, then we choose the active one by the
* highest sequence number. If the sequence numbers are equal, that is
* invalid */
if (h1_seq > h2_seq) {
s->curr_header = 0;
} else if (h2_seq > h1_seq) {
s->curr_header = 1;
} else {
ret = -EINVAL;
goto fail;
}
}
ret = 0;
goto exit;
fail:
qerror_report(ERROR_CLASS_GENERIC_ERROR, "No valid VHDX header found");
qemu_vfree(header1);
qemu_vfree(header2);
s->headers[0] = NULL;
s->headers[1] = NULL;
exit:
qemu_vfree(buffer);
return ret;
}
static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
{
int ret = 0;
uint8_t *buffer;
int offset = 0;
VHDXRegionTableEntry rt_entry;
uint32_t i;
bool bat_rt_found = false;
bool metadata_rt_found = false;
/* We have to read the whole 64KB block, because the crc32 is over the
* whole block */
buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
if (ret < 0) {
goto fail;
}
memcpy(&s->rt, buffer, sizeof(s->rt));
le32_to_cpus(&s->rt.signature);
le32_to_cpus(&s->rt.checksum);
le32_to_cpus(&s->rt.entry_count);
le32_to_cpus(&s->rt.reserved);
offset += sizeof(s->rt);
if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
memcmp(&s->rt.signature, "regi", 4)) {
ret = -EINVAL;
goto fail;
}
/* Per spec, maximum region table entry count is 2047 */
if (s->rt.entry_count > 2047) {
ret = -EINVAL;
goto fail;
}
for (i = 0; i < s->rt.entry_count; i++) {
memcpy(&rt_entry, buffer + offset, sizeof(rt_entry));
offset += sizeof(rt_entry);
leguid_to_cpus(&rt_entry.guid);
le64_to_cpus(&rt_entry.file_offset);
le32_to_cpus(&rt_entry.length);
le32_to_cpus(&rt_entry.data_bits);
/* see if we recognize the entry */
if (guid_eq(rt_entry.guid, bat_guid)) {
/* must be unique; if we have already found it this is invalid */
if (bat_rt_found) {
ret = -EINVAL;
goto fail;
}
bat_rt_found = true;
s->bat_rt = rt_entry;
continue;
}
if (guid_eq(rt_entry.guid, metadata_guid)) {
/* must be unique; if we have already found it this is invalid */
if (metadata_rt_found) {
ret = -EINVAL;
goto fail;
}
metadata_rt_found = true;
s->metadata_rt = rt_entry;
continue;
}
if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
/* cannot read vhdx file - required region table entry that
* we do not understand. per spec, we must fail to open */
ret = -ENOTSUP;
goto fail;
}
}
ret = 0;
fail:
qemu_vfree(buffer);
return ret;
}
/* Metadata initial parser
*
* This loads all the metadata entry fields. This may cause additional
* fields to be processed (e.g. parent locator, etc..).
*
* There are 5 Metadata items that are always required:
* - File Parameters (block size, has a parent)
* - Virtual Disk Size (size, in bytes, of the virtual drive)
* - Page 83 Data (scsi page 83 guid)
* - Logical Sector Size (logical sector size in bytes, either 512 or
* 4096. We only support 512 currently)
* - Physical Sector Size (512 or 4096)
*
* Also, if the File Parameters indicate this is a differencing file,
* we must also look for the Parent Locator metadata item.
*/
static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
{
int ret = 0;
uint8_t *buffer;
int offset = 0;
uint32_t i = 0;
VHDXMetadataTableEntry md_entry;
buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
VHDX_METADATA_TABLE_MAX_SIZE);
if (ret < 0) {
goto exit;
}
memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
offset += sizeof(s->metadata_hdr);
le64_to_cpus(&s->metadata_hdr.signature);
le16_to_cpus(&s->metadata_hdr.reserved);
le16_to_cpus(&s->metadata_hdr.entry_count);
if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.present = 0;
if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
(VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
ret = -EINVAL;
goto exit;
}
for (i = 0; i < s->metadata_hdr.entry_count; i++) {
memcpy(&md_entry, buffer + offset, sizeof(md_entry));
offset += sizeof(md_entry);
leguid_to_cpus(&md_entry.item_id);
le32_to_cpus(&md_entry.offset);
le32_to_cpus(&md_entry.length);
le32_to_cpus(&md_entry.data_bits);
le32_to_cpus(&md_entry.reserved2);
if (guid_eq(md_entry.item_id, file_param_guid)) {
if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.file_parameters_entry = md_entry;
s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, virtual_size_guid)) {
if (s->metadata_entries.present & META_VIRTUAL_DISK_SIZE_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.virtual_disk_size_entry = md_entry;
s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, page83_guid)) {
if (s->metadata_entries.present & META_PAGE_83_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.page83_data_entry = md_entry;
s->metadata_entries.present |= META_PAGE_83_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, logical_sector_guid)) {
if (s->metadata_entries.present &
META_LOGICAL_SECTOR_SIZE_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.logical_sector_size_entry = md_entry;
s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, phys_sector_guid)) {
if (s->metadata_entries.present & META_PHYS_SECTOR_SIZE_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.phys_sector_size_entry = md_entry;
s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
continue;
}
if (guid_eq(md_entry.item_id, parent_locator_guid)) {
if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
ret = -EINVAL;
goto exit;
}
s->metadata_entries.parent_locator_entry = md_entry;
s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
continue;
}
if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
/* cannot read vhdx file - required region table entry that
* we do not understand. per spec, we must fail to open */
ret = -ENOTSUP;
goto exit;
}
}
if (s->metadata_entries.present != META_ALL_PRESENT) {
ret = -ENOTSUP;
goto exit;
}
ret = bdrv_pread(bs->file,
s->metadata_entries.file_parameters_entry.offset
+ s->metadata_rt.file_offset,
&s->params,
sizeof(s->params));
if (ret < 0) {
goto exit;
}
le32_to_cpus(&s->params.block_size);
le32_to_cpus(&s->params.data_bits);
/* We now have the file parameters, so we can tell if this is a
* differencing file (i.e.. has_parent), is dynamic or fixed
* sized (leave_blocks_allocated), and the block size */
/* The parent locator required iff the file parameters has_parent set */
if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
/* TODO: parse parent locator fields */
ret = -ENOTSUP; /* temp, until differencing files are supported */
goto exit;
} else {
/* if has_parent is set, but there is not parent locator present,
* then that is an invalid combination */
ret = -EINVAL;
goto exit;
}
}
/* determine virtual disk size, logical sector size,
* and phys sector size */
ret = bdrv_pread(bs->file,
s->metadata_entries.virtual_disk_size_entry.offset
+ s->metadata_rt.file_offset,
&s->virtual_disk_size,
sizeof(uint64_t));
if (ret < 0) {
goto exit;
}
ret = bdrv_pread(bs->file,
s->metadata_entries.logical_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->logical_sector_size,
sizeof(uint32_t));
if (ret < 0) {
goto exit;
}
ret = bdrv_pread(bs->file,
s->metadata_entries.phys_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->physical_sector_size,
sizeof(uint32_t));
if (ret < 0) {
goto exit;
}
le64_to_cpus(&s->virtual_disk_size);
le32_to_cpus(&s->logical_sector_size);
le32_to_cpus(&s->physical_sector_size);
if (s->logical_sector_size == 0 || s->params.block_size == 0) {
ret = -EINVAL;
goto exit;
}
/* both block_size and sector_size are guaranteed powers of 2 */
s->sectors_per_block = s->params.block_size / s->logical_sector_size;
s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
(uint64_t)s->logical_sector_size /
(uint64_t)s->params.block_size;
/* These values are ones we will want to use for division / multiplication
* later on, and they are all guaranteed (per the spec) to be powers of 2,
* so we can take advantage of that for shift operations during
* reads/writes */
if (s->logical_sector_size & (s->logical_sector_size - 1)) {
ret = -EINVAL;
goto exit;
}
if (s->sectors_per_block & (s->sectors_per_block - 1)) {
ret = -EINVAL;
goto exit;
}
if (s->chunk_ratio & (s->chunk_ratio - 1)) {
ret = -EINVAL;
goto exit;
}
s->block_size = s->params.block_size;
if (s->block_size & (s->block_size - 1)) {
ret = -EINVAL;
goto exit;
}
s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
s->block_size_bits = 31 - clz32(s->block_size);
ret = 0;
exit:
qemu_vfree(buffer);
return ret;
}
/* Parse the replay log. Per the VHDX spec, if the log is present
* it must be replayed prior to opening the file, even read-only.
*
* If read-only, we must replay the log in RAM (or refuse to open
* a dirty VHDX file read-only */
static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s)
{
int ret = 0;
int i;
VHDXHeader *hdr;
hdr = s->headers[s->curr_header];
/* either the log guid, or log length is zero,
* then a replay log is present */
for (i = 0; i < sizeof(hdr->log_guid.data4); i++) {
ret |= hdr->log_guid.data4[i];
}
if (hdr->log_guid.data1 == 0 &&
hdr->log_guid.data2 == 0 &&
hdr->log_guid.data3 == 0 &&
ret == 0) {
goto exit;
}
/* per spec, only log version of 0 is supported */
if (hdr->log_version != 0) {
ret = -EINVAL;
goto exit;
}
if (hdr->log_length == 0) {
goto exit;
}
/* We currently do not support images with logs to replay */
ret = -ENOTSUP;
exit:
return ret;
}
static int vhdx_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVVHDXState *s = bs->opaque;
int ret = 0;
uint32_t i;
uint64_t signature;
uint32_t data_blocks_cnt, bitmap_blocks_cnt;
s->bat = NULL;
qemu_co_mutex_init(&s->lock);
/* validate the file signature */
ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
if (memcmp(&signature, "vhdxfile", 8)) {
ret = -EINVAL;
goto fail;
}
ret = vhdx_parse_header(bs, s);
if (ret) {
goto fail;
}
ret = vhdx_parse_log(bs, s);
if (ret) {
goto fail;
}
ret = vhdx_open_region_tables(bs, s);
if (ret) {
goto fail;
}
ret = vhdx_parse_metadata(bs, s);
if (ret) {
goto fail;
}
s->block_size = s->params.block_size;
/* the VHDX spec dictates that virtual_disk_size is always a multiple of
* logical_sector_size */
bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits;
data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
data_blocks_cnt++;
}
bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
bitmap_blocks_cnt++;
}
if (s->parent_entries) {
s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
} else {
s->bat_entries = data_blocks_cnt +
((data_blocks_cnt - 1) >> s->chunk_ratio_bits);
}
s->bat_offset = s->bat_rt.file_offset;
if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) {
/* BAT allocation is not large enough for all entries */
ret = -EINVAL;
goto fail;
}
s->bat = qemu_blockalign(bs, s->bat_rt.length);
ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
if (ret < 0) {
goto fail;
}
for (i = 0; i < s->bat_entries; i++) {
le64_to_cpus(&s->bat[i]);
}
if (flags & BDRV_O_RDWR) {
ret = -ENOTSUP;
goto fail;
}
/* TODO: differencing files, write */
return 0;
fail:
qemu_vfree(s->headers[0]);
qemu_vfree(s->headers[1]);
qemu_vfree(s->bat);
qemu_vfree(s->parent_entries);
return ret;
}
static int vhdx_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
return 0;
}
/*
* Perform sector to block offset translations, to get various
* sector and file offsets into the image. See VHDXSectorInfo
*/
static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num,
int nb_sectors, VHDXSectorInfo *sinfo)
{
uint32_t block_offset;
sinfo->bat_idx = sector_num >> s->sectors_per_block_bits;
/* effectively a modulo - this gives us the offset into the block
* (in sector sizes) for our sector number */
block_offset = sector_num - (sinfo->bat_idx << s->sectors_per_block_bits);
/* the chunk ratio gives us the interleaving of the sector
* bitmaps, so we need to advance our page block index by the
* sector bitmaps entry number */
sinfo->bat_idx += sinfo->bat_idx >> s->chunk_ratio_bits;
/* the number of sectors we can read/write in this cycle */
sinfo->sectors_avail = s->sectors_per_block - block_offset;
sinfo->bytes_left = sinfo->sectors_avail << s->logical_sector_size_bits;
if (sinfo->sectors_avail > nb_sectors) {
sinfo->sectors_avail = nb_sectors;
}
sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits;
sinfo->file_offset = s->bat[sinfo->bat_idx] >> VHDX_BAT_FILE_OFF_BITS;
sinfo->block_offset = block_offset << s->logical_sector_size_bits;
/* The file offset must be past the header section, so must be > 0 */
if (sinfo->file_offset == 0) {
return;
}
/* block offset is the offset in vhdx logical sectors, in
* the payload data block. Convert that to a byte offset
* in the block, and add in the payload data block offset
* in the file, in bytes, to get the final read address */
sinfo->file_offset <<= 20; /* now in bytes, rather than 1MB units */
sinfo->file_offset += sinfo->block_offset;
}
static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
BDRVVHDXState *s = bs->opaque;
int ret = 0;
VHDXSectorInfo sinfo;
uint64_t bytes_done = 0;
QEMUIOVector hd_qiov;
qemu_iovec_init(&hd_qiov, qiov->niov);
qemu_co_mutex_lock(&s->lock);
while (nb_sectors > 0) {
/* We are a differencing file, so we need to inspect the sector bitmap
* to see if we have the data or not */
if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
/* not supported yet */
ret = -ENOTSUP;
goto exit;
} else {
vhdx_block_translate(s, sector_num, nb_sectors, &sinfo);
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail);
/* check the payload block state */
switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) {
case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
case PAYLOAD_BLOCK_UNDEFINED: /* fall through */
case PAYLOAD_BLOCK_UNMAPPED: /* fall through */
case PAYLOAD_BLOCK_ZERO:
/* return zero */
qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
break;
case PAYLOAD_BLOCK_FULL_PRESENT:
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->file,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sinfo.sectors_avail, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto exit;
}
break;
case PAYLOAD_BLOCK_PARTIALLY_PRESENT:
/* we don't yet support difference files, fall through
* to error */
default:
ret = -EIO;
goto exit;
break;
}
nb_sectors -= sinfo.sectors_avail;
sector_num += sinfo.sectors_avail;
bytes_done += sinfo.bytes_avail;
}
}
ret = 0;
exit:
qemu_co_mutex_unlock(&s->lock);
qemu_iovec_destroy(&hd_qiov);
return ret;
}
static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
return -ENOTSUP;
}
static void vhdx_close(BlockDriverState *bs)
{
BDRVVHDXState *s = bs->opaque;
qemu_vfree(s->headers[0]);
qemu_vfree(s->headers[1]);
qemu_vfree(s->bat);
qemu_vfree(s->parent_entries);
}
static BlockDriver bdrv_vhdx = {
.format_name = "vhdx",
.instance_size = sizeof(BDRVVHDXState),
.bdrv_probe = vhdx_probe,
.bdrv_open = vhdx_open,
.bdrv_close = vhdx_close,
.bdrv_reopen_prepare = vhdx_reopen_prepare,
.bdrv_co_readv = vhdx_co_readv,
.bdrv_co_writev = vhdx_co_writev,
};
static void bdrv_vhdx_init(void)
{
bdrv_register(&bdrv_vhdx);
}
block_init(bdrv_vhdx_init);

325
block/vhdx.h Normal file
View File

@ -0,0 +1,325 @@
/*
* Block driver for Hyper-V VHDX Images
*
* Copyright (c) 2013 Red Hat, Inc.,
*
* Authors:
* Jeff Cody <jcody@redhat.com>
*
* This is based on the "VHDX Format Specification v0.95", published 4/12/2012
* by Microsoft:
* https://www.microsoft.com/en-us/download/details.aspx?id=29681
*
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
* See the COPYING.LIB file in the top-level directory.
*
*/
#ifndef BLOCK_VHDX_H
#define BLOCK_VHDX_H
/* Structures and fields present in the VHDX file */
/* The header section has the following blocks,
* each block is 64KB:
*
* _____________________________________________________________________________
* | File Id. | Header 1 | Header 2 | Region Table | Reserved (768KB) |
* |----------|---------------|------------|--------------|--------------------|
* | | | | | |
* 0.........64KB...........128KB........192KB..........256KB................1MB
*/
#define VHDX_HEADER_BLOCK_SIZE (64*1024)
#define VHDX_FILE_ID_OFFSET 0
#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE*1)
#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE*2)
#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE*3)
/*
* A note on the use of MS-GUID fields. For more details on the GUID,
* please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
*
* The VHDX specification only states that these are MS GUIDs, and which
* bytes are data1-data4. It makes no mention of what algorithm should be used
* to generate the GUID, nor what standard. However, looking at the specified
* known GUID fields, it appears the GUIDs are:
* Standard/DCE GUID type (noted by 10b in the MSB of byte 0 of .data4)
* Random algorithm (noted by 0x4XXX for .data3)
*/
/* ---- HEADER SECTION STRUCTURES ---- */
/* These structures are ones that are defined in the VHDX specification
* document */
typedef struct VHDXFileIdentifier {
uint64_t signature; /* "vhdxfile" in ASCII */
uint16_t creator[256]; /* optional; utf-16 string to identify
the vhdx file creator. Diagnotistic
only */
} VHDXFileIdentifier;
/* the guid is a 16 byte unique ID - the definition for this used by
* Microsoft is not just 16 bytes though - it is a structure that is defined,
* so we need to follow it here so that endianness does not trip us up */
typedef struct MSGUID {
uint32_t data1;
uint16_t data2;
uint16_t data3;
uint8_t data4[8];
} MSGUID;
#define guid_eq(a, b) \
(memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
#define VHDX_HEADER_SIZE (4*1024) /* although the vhdx_header struct in disk
is only 582 bytes, for purposes of crc
the header is the first 4KB of the 64KB
block */
/* The full header is 4KB, although the actual header data is much smaller.
* But for the checksum calculation, it is over the entire 4KB structure,
* not just the defined portion of it */
typedef struct QEMU_PACKED VHDXHeader {
uint32_t signature; /* "head" in ASCII */
uint32_t checksum; /* CRC-32C hash of the whole header */
uint64_t sequence_number; /* Seq number of this header. Each
VHDX file has 2 of these headers,
and only the header with the highest
sequence number is valid */
MSGUID file_write_guid; /* 128 bit unique identifier. Must be
updated to new, unique value before
the first modification is made to
file */
MSGUID data_write_guid; /* 128 bit unique identifier. Must be
updated to new, unique value before
the first modification is made to
visible data. Visbile data is
defined as:
- system & user metadata
- raw block data
- disk size
- any change that will
cause the virtual disk
sector read to differ
This does not need to change if
blocks are re-arranged */
MSGUID log_guid; /* 128 bit unique identifier. If zero,
there is no valid log. If non-zero,
log entries with this guid are
valid. */
uint16_t log_version; /* version of the log format. Mustn't be
zero, unless log_guid is also zero */
uint16_t version; /* version of th evhdx file. Currently,
only supported version is "1" */
uint32_t log_length; /* length of the log. Must be multiple
of 1MB */
uint64_t log_offset; /* byte offset in the file of the log.
Must also be a multiple of 1MB */
} VHDXHeader;
/* Header for the region table block */
typedef struct QEMU_PACKED VHDXRegionTableHeader {
uint32_t signature; /* "regi" in ASCII */
uint32_t checksum; /* CRC-32C hash of the 64KB table */
uint32_t entry_count; /* number of valid entries */
uint32_t reserved;
} VHDXRegionTableHeader;
/* Individual region table entry. There may be a maximum of 2047 of these
*
* There are two known region table properties. Both are required.
* BAT (block allocation table): 2DC27766F62342009D64115E9BFD4A08
* Metadata: 8B7CA20647904B9AB8FE575F050F886E
*/
#define VHDX_REGION_ENTRY_REQUIRED 0x01 /* if set, parser must understand
this entry in order to open
file */
typedef struct QEMU_PACKED VHDXRegionTableEntry {
MSGUID guid; /* 128-bit unique identifier */
uint64_t file_offset; /* offset of the object in the file.
Must be multiple of 1MB */
uint32_t length; /* length, in bytes, of the object */
uint32_t data_bits;
} VHDXRegionTableEntry;
/* ---- LOG ENTRY STRUCTURES ---- */
#define VHDX_LOG_HDR_SIZE 64
typedef struct QEMU_PACKED VHDXLogEntryHeader {
uint32_t signature; /* "loge" in ASCII */
uint32_t checksum; /* CRC-32C hash of the 64KB table */
uint32_t entry_length; /* length in bytes, multiple of 1MB */
uint32_t tail; /* byte offset of first log entry of a
seq, where this entry is the last
entry */
uint64_t sequence_number; /* incremented with each log entry.
May not be zero. */
uint32_t descriptor_count; /* number of descriptors in this log
entry, must be >= 0 */
uint32_t reserved;
MSGUID log_guid; /* value of the log_guid from
vhdx_header. If not found in
vhdx_header, it is invalid */
uint64_t flushed_file_offset; /* see spec for full details - this
should be vhdx file size in bytes */
uint64_t last_file_offset; /* size in bytes that all allocated
file structures fit into */
} VHDXLogEntryHeader;
#define VHDX_LOG_DESC_SIZE 32
typedef struct QEMU_PACKED VHDXLogDescriptor {
uint32_t signature; /* "zero" or "desc" in ASCII */
union {
uint32_t reserved; /* zero desc */
uint32_t trailing_bytes; /* data desc: bytes 4092-4096 of the
data sector */
};
union {
uint64_t zero_length; /* zero desc: length of the section to
zero */
uint64_t leading_bytes; /* data desc: bytes 0-7 of the data
sector */
};
uint64_t file_offset; /* file offset to write zeros - multiple
of 4kB */
uint64_t sequence_number; /* must match same field in
vhdx_log_entry_header */
} VHDXLogDescriptor;
typedef struct QEMU_PACKED VHDXLogDataSector {
uint32_t data_signature; /* "data" in ASCII */
uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */
uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive).
see the data descriptor field for the
other mising bytes */
uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */
} VHDXLogDataSector;
/* block states - different state values depending on whether it is a
* payload block, or a sector block. */
#define PAYLOAD_BLOCK_NOT_PRESENT 0
#define PAYLOAD_BLOCK_UNDEFINED 1
#define PAYLOAD_BLOCK_ZERO 2
#define PAYLOAD_BLOCK_UNMAPPED 5
#define PAYLOAD_BLOCK_FULL_PRESENT 6
#define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
#define SB_BLOCK_NOT_PRESENT 0
#define SB_BLOCK_PRESENT 6
/* per the spec */
#define VHDX_MAX_SECTORS_PER_BLOCK (1<<23)
/* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
other bits are reserved */
#define VHDX_BAT_STATE_BIT_MASK 0x07
#define VHDX_BAT_FILE_OFF_BITS (64-44)
typedef uint64_t VHDXBatEntry;
/* ---- METADATA REGION STRUCTURES ---- */
#define VHDX_METADATA_ENTRY_SIZE 32
#define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */
#define VHDX_METADATA_TABLE_MAX_SIZE \
(VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
typedef struct QEMU_PACKED VHDXMetadataTableHeader {
uint64_t signature; /* "metadata" in ASCII */
uint16_t reserved;
uint16_t entry_count; /* number table entries. <= 2047 */
uint32_t reserved2[5];
} VHDXMetadataTableHeader;
#define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */
#define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set,
otherwise file metdata */
#define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this
entry to open the file */
typedef struct QEMU_PACKED VHDXMetadataTableEntry {
MSGUID item_id; /* 128-bit identifier for metadata */
uint32_t offset; /* byte offset of the metadata. At
least 64kB. Relative to start of
metadata region */
/* note: if length = 0, so is offset */
uint32_t length; /* length of metadata. <= 1MB. */
uint32_t data_bits; /* least-significant 3 bits are flags, the
rest are reserved (see above) */
uint32_t reserved2;
} VHDXMetadataTableEntry;
#define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01 /* Do not change any blocks to
be BLOCK_NOT_PRESENT.
If set indicates a fixed
size VHDX file */
#define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */
typedef struct QEMU_PACKED VHDXFileParameters {
uint32_t block_size; /* size of each payload block, always
power of 2, <= 256MB and >= 1MB. */
uint32_t data_bits; /* least-significant 2 bits are flags, the rest
are reserved (see above) */
} VHDXFileParameters;
typedef struct QEMU_PACKED VHDXVirtualDiskSize {
uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes.
Must be multiple of the sector size,
max of 64TB */
} VHDXVirtualDiskSize;
typedef struct QEMU_PACKED VHDXPage83Data {
MSGUID page_83_data[16]; /* unique id for scsi devices that
support page 0x83 */
} VHDXPage83Data;
typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize {
uint32_t logical_sector_size; /* virtual disk sector size (in bytes).
Can only be 512 or 4096 bytes */
} VHDXVirtualDiskLogicalSectorSize;
typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
uint32_t physical_sector_size; /* physical sector size (in bytes).
Can only be 512 or 4096 bytes */
} VHDXVirtualDiskPhysicalSectorSize;
typedef struct QEMU_PACKED VHDXParentLocatorHeader {
MSGUID locator_type[16]; /* type of the parent virtual disk. */
uint16_t reserved;
uint16_t key_value_count; /* number of key/value pairs for this
locator */
} VHDXParentLocatorHeader;
/* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */
typedef struct QEMU_PACKED VHDXParentLocatorEntry {
uint32_t key_offset; /* offset in metadata for key, > 0 */
uint32_t value_offset; /* offset in metadata for value, >0 */
uint16_t key_length; /* length of entry key, > 0 */
uint16_t value_length; /* length of entry value, > 0 */
} VHDXParentLocatorEntry;
/* ----- END VHDX SPECIFICATION STRUCTURES ---- */
uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
int crc_offset);
bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
static void leguid_to_cpus(MSGUID *guid)
{
le32_to_cpus(&guid->data1);
le16_to_cpus(&guid->data2);
le16_to_cpus(&guid->data3);
}
#endif

View File

@ -32,11 +32,25 @@
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
#define VMDK4_COMPRESSION_DEFLATE 1
#define VMDK4_FLAG_NL_DETECT (1 << 0)
#define VMDK4_FLAG_RGD (1 << 1)
/* Zeroed-grain enable bit */
#define VMDK4_FLAG_ZERO_GRAIN (1 << 2)
#define VMDK4_FLAG_COMPRESS (1 << 16)
#define VMDK4_FLAG_MARKER (1 << 17)
#define VMDK4_GD_AT_END 0xffffffffffffffffULL
#define VMDK_GTE_ZEROED 0x1
/* VMDK internal error codes */
#define VMDK_OK 0
#define VMDK_ERROR (-1)
/* Cluster not allocated */
#define VMDK_UNALLOC (-2)
#define VMDK_ZEROED (-3)
#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
typedef struct {
uint32_t version;
uint32_t flags;
@ -48,19 +62,20 @@ typedef struct {
uint32_t cylinders;
uint32_t heads;
uint32_t sectors_per_track;
} VMDK3Header;
} QEMU_PACKED VMDK3Header;
typedef struct {
uint32_t version;
uint32_t flags;
int64_t capacity;
int64_t granularity;
int64_t desc_offset;
int64_t desc_size;
int32_t num_gtes_per_gte;
int64_t rgd_offset;
int64_t gd_offset;
int64_t grain_offset;
uint64_t capacity;
uint64_t granularity;
uint64_t desc_offset;
uint64_t desc_size;
/* Number of GrainTableEntries per GrainTable */
uint32_t num_gtes_per_gt;
uint64_t rgd_offset;
uint64_t gd_offset;
uint64_t grain_offset;
char filler[1];
char check_bytes[4];
uint16_t compressAlgorithm;
@ -73,6 +88,8 @@ typedef struct VmdkExtent {
bool flat;
bool compressed;
bool has_marker;
bool has_zero_grain;
int version;
int64_t sectors;
int64_t end_sector;
int64_t flat_start_offset;
@ -93,7 +110,7 @@ typedef struct VmdkExtent {
typedef struct BDRVVmdkState {
CoMutex lock;
int desc_offset;
uint64_t desc_offset;
bool cid_updated;
uint32_t parent_cid;
int num_extents;
@ -108,13 +125,14 @@ typedef struct VmdkMetaData {
unsigned int l2_index;
unsigned int l2_offset;
int valid;
uint32_t *l2_cache_entry;
} VmdkMetaData;
typedef struct VmdkGrainMarker {
uint64_t lba;
uint32_t size;
uint8_t data[0];
} VmdkGrainMarker;
} QEMU_PACKED VmdkGrainMarker;
enum {
MARKER_END_OF_STREAM = 0,
@ -368,15 +386,22 @@ static int vmdk_parent_open(BlockDriverState *bs)
/* Create and append extent to the extent array. Return the added VmdkExtent
* address. return NULL if allocation failed. */
static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
static int vmdk_add_extent(BlockDriverState *bs,
BlockDriverState *file, bool flat, int64_t sectors,
int64_t l1_offset, int64_t l1_backup_offset,
uint32_t l1_size,
int l2_size, unsigned int cluster_sectors)
int l2_size, uint64_t cluster_sectors,
VmdkExtent **new_extent)
{
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
if (cluster_sectors > 0x200000) {
/* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
error_report("invalid granularity, image may be corrupt");
return -EINVAL;
}
s->extents = g_realloc(s->extents,
(s->num_extents + 1) * sizeof(VmdkExtent));
extent = &s->extents[s->num_extents];
@ -399,7 +424,10 @@ static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
extent->end_sector = extent->sectors;
}
bs->total_sectors = extent->end_sector;
return extent;
if (new_extent) {
*new_extent = extent;
}
return 0;
}
static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
@ -458,12 +486,17 @@ static int vmdk_open_vmdk3(BlockDriverState *bs,
if (ret < 0) {
return ret;
}
extent = vmdk_add_extent(bs,
ret = vmdk_add_extent(bs,
bs->file, false,
le32_to_cpu(header.disk_sectors),
le32_to_cpu(header.l1dir_offset) << 9,
0, 1 << 6, 1 << 9,
le32_to_cpu(header.granularity));
le32_to_cpu(header.granularity),
&extent);
if (ret < 0) {
return ret;
}
ret = vmdk_init_tables(bs, extent);
if (ret) {
/* free extent allocated by vmdk_add_extent */
@ -473,7 +506,7 @@ static int vmdk_open_vmdk3(BlockDriverState *bs,
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
int64_t desc_offset);
uint64_t desc_offset);
static int vmdk_open_vmdk4(BlockDriverState *bs,
BlockDriverState *file,
@ -490,8 +523,11 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (ret < 0) {
return ret;
}
if (header.capacity == 0 && header.desc_offset) {
return vmdk_open_desc_file(bs, flags, header.desc_offset << 9);
if (header.capacity == 0) {
uint64_t desc_offset = le64_to_cpu(header.desc_offset);
if (desc_offset) {
return vmdk_open_desc_file(bs, flags, desc_offset << 9);
}
}
if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
@ -541,26 +577,54 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
header = footer.header;
}
l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
if (le32_to_cpu(header.version) >= 3) {
char buf[64];
snprintf(buf, sizeof(buf), "VMDK version %d",
le32_to_cpu(header.version));
qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bs->device_name, "vmdk", buf);
return -ENOTSUP;
}
if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
error_report("L2 table size too big");
return -EINVAL;
}
l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
* le64_to_cpu(header.granularity);
if (l1_entry_sectors == 0) {
return -EINVAL;
}
l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
/ l1_entry_sectors;
if (l1_size > 512 * 1024 * 1024) {
/* although with big capacity and small l1_entry_sectors, we can get a
* big l1_size, we don't want unbounded value to allocate the table.
* Limit it to 512M, which is 16PB for default cluster and L2 table
* size */
error_report("L1 size too big");
return -EFBIG;
}
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
extent = vmdk_add_extent(bs, file, false,
ret = vmdk_add_extent(bs, file, false,
le64_to_cpu(header.capacity),
le64_to_cpu(header.gd_offset) << 9,
l1_backup_offset,
l1_size,
le32_to_cpu(header.num_gtes_per_gte),
le64_to_cpu(header.granularity));
le32_to_cpu(header.num_gtes_per_gt),
le64_to_cpu(header.granularity),
&extent);
if (ret < 0) {
return ret;
}
extent->compressed =
le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
extent->version = le32_to_cpu(header.version);
extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
ret = vmdk_init_tables(bs, extent);
if (ret) {
/* free extent allocated by vmdk_add_extent */
@ -578,22 +642,22 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
opt_pos = strstr(desc, opt_name);
if (!opt_pos) {
return -1;
return VMDK_ERROR;
}
/* Skip "=\"" following opt_name */
opt_pos += strlen(opt_name) + 2;
if (opt_pos >= end) {
return -1;
return VMDK_ERROR;
}
opt_end = opt_pos;
while (opt_end < end && *opt_end != '"') {
opt_end++;
}
if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
return -1;
return VMDK_ERROR;
}
pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
return 0;
return VMDK_OK;
}
/* Open an extent file and append to bs array */
@ -661,7 +725,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
path_combine(extent_path, sizeof(extent_path),
desc_file_path, fname);
ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags);
if (ret) {
return ret;
}
@ -671,8 +735,11 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
/* FLAT extent */
VmdkExtent *extent;
extent = vmdk_add_extent(bs, extent_file, true, sectors,
0, 0, 0, 0, sectors);
ret = vmdk_add_extent(bs, extent_file, true, sectors,
0, 0, 0, 0, sectors, &extent);
if (ret < 0) {
return ret;
}
extent->flat_start_offset = flat_offset << 9;
} else if (!strcmp(type, "SPARSE")) {
/* SPARSE extent */
@ -697,30 +764,43 @@ next_line:
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
int64_t desc_offset)
uint64_t desc_offset)
{
int ret;
char buf[2048];
char *buf = NULL;
char ct[128];
BDRVVmdkState *s = bs->opaque;
int64_t size;
ret = bdrv_pread(bs->file, desc_offset, buf, sizeof(buf));
if (ret < 0) {
return ret;
size = bdrv_getlength(bs->file);
if (size < 0) {
return -EINVAL;
}
size = MIN(size, 1 << 20); /* avoid unbounded allocation */
buf = g_malloc0(size + 1);
ret = bdrv_pread(bs->file, desc_offset, buf, size);
if (ret < 0) {
goto exit;
}
buf[2047] = '\0';
if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
return -EMEDIUMTYPE;
ret = -EMEDIUMTYPE;
goto exit;
}
if (strcmp(ct, "monolithicFlat") &&
strcmp(ct, "twoGbMaxExtentSparse") &&
strcmp(ct, "twoGbMaxExtentFlat")) {
fprintf(stderr,
"VMDK: Not supported image type \"%s\""".\n", ct);
return -ENOTSUP;
ret = -ENOTSUP;
goto exit;
}
s->desc_offset = 0;
return vmdk_parse_extents(buf, bs, bs->file->filename);
ret = vmdk_parse_extents(buf, bs, bs->file->filename);
exit:
g_free(buf);
return ret;
}
static int vmdk_open(BlockDriverState *bs, QDict *options, int flags)
@ -763,16 +843,17 @@ static int get_whole_cluster(BlockDriverState *bs,
uint64_t offset,
bool allocate)
{
/* 128 sectors * 512 bytes each = grain size 64KB */
uint8_t whole_grain[extent->cluster_sectors * 512];
int ret = VMDK_OK;
uint8_t *whole_grain = NULL;
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
if (bs->backing_hd) {
int ret;
whole_grain =
qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
if (!vmdk_is_cid_valid(bs)) {
return -1;
ret = VMDK_ERROR;
goto exit;
}
/* floor offset to cluster */
@ -780,30 +861,35 @@ static int get_whole_cluster(BlockDriverState *bs,
ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
return -1;
ret = VMDK_ERROR;
goto exit;
}
/* Write grain only into the active image */
ret = bdrv_write(extent->file, cluster_offset, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
return -1;
ret = VMDK_ERROR;
goto exit;
}
}
return 0;
exit:
qemu_vfree(whole_grain);
return ret;
}
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
{
uint32_t offset;
QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
offset = cpu_to_le32(m_data->offset);
/* update L2 table */
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(m_data->offset)),
&(m_data->offset),
sizeof(m_data->offset)
) < 0) {
return -1;
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
/* update backup L2 table */
if (extent->l1_backup_table_offset != 0) {
@ -812,13 +898,15 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(m_data->offset)),
&(m_data->offset), sizeof(m_data->offset)
) < 0) {
return -1;
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
}
if (m_data->l2_cache_entry) {
*m_data->l2_cache_entry = offset;
}
return 0;
return VMDK_OK;
}
static int get_cluster_offset(BlockDriverState *bs,
@ -830,24 +918,25 @@ static int get_cluster_offset(BlockDriverState *bs,
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
uint32_t min_count, *l2_table, tmp = 0;
uint32_t min_count, *l2_table;
bool zeroed = false;
if (m_data) {
m_data->valid = 0;
}
if (extent->flat) {
*cluster_offset = extent->flat_start_offset;
return 0;
return VMDK_OK;
}
offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
l1_index = (offset >> 9) / extent->l1_entry_sectors;
if (l1_index >= extent->l1_size) {
return -1;
return VMDK_ERROR;
}
l2_offset = extent->l1_table[l1_index];
if (!l2_offset) {
return -1;
return VMDK_UNALLOC;
}
for (i = 0; i < L2_CACHE_SIZE; i++) {
if (l2_offset == extent->l2_cache_offsets[i]) {
@ -877,7 +966,7 @@ static int get_cluster_offset(BlockDriverState *bs,
l2_table,
extent->l2_size * sizeof(uint32_t)
) != extent->l2_size * sizeof(uint32_t)) {
return -1;
return VMDK_ERROR;
}
extent->l2_cache_offsets[min_index] = l2_offset;
@ -886,9 +975,21 @@ static int get_cluster_offset(BlockDriverState *bs,
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
*cluster_offset = le32_to_cpu(l2_table[l2_index]);
if (!*cluster_offset) {
if (m_data) {
m_data->valid = 1;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
m_data->offset = *cluster_offset;
m_data->l2_offset = l2_offset;
m_data->l2_cache_entry = &l2_table[l2_index];
}
if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
zeroed = true;
}
if (!*cluster_offset || zeroed) {
if (!allocate) {
return -1;
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
}
/* Avoid the L2 tables update for the images that have snapshots. */
@ -901,8 +1002,7 @@ static int get_cluster_offset(BlockDriverState *bs,
}
*cluster_offset >>= 9;
tmp = cpu_to_le32(*cluster_offset);
l2_table[l2_index] = tmp;
l2_table[l2_index] = cpu_to_le32(*cluster_offset);
/* First of all we write grain itself, to avoid race condition
* that may to corrupt the image.
@ -911,19 +1011,15 @@ static int get_cluster_offset(BlockDriverState *bs,
*/
if (get_whole_cluster(
bs, extent, *cluster_offset, offset, allocate) == -1) {
return -1;
return VMDK_ERROR;
}
if (m_data) {
m_data->offset = tmp;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
m_data->l2_offset = l2_offset;
m_data->valid = 1;
m_data->offset = *cluster_offset;
}
}
*cluster_offset <<= 9;
return 0;
return VMDK_OK;
}
static VmdkExtent *find_extent(BDRVVmdkState *s,
@ -959,8 +1055,8 @@ static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
ret = get_cluster_offset(bs, extent, NULL,
sector_num * 512, 0, &offset);
qemu_co_mutex_unlock(&s->lock);
/* get_cluster_offset returning 0 means success */
ret = !ret;
ret = (ret == VMDK_OK || ret == VMDK_ZEROED);
index_in_cluster = sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
@ -1103,9 +1199,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
if (n > nb_sectors) {
n = nb_sectors;
}
if (ret) {
if (ret != VMDK_OK) {
/* if not allocated, try to read from parent image, if exist */
if (bs->backing_hd) {
if (bs->backing_hd && ret != VMDK_ZEROED) {
if (!vmdk_is_cid_valid(bs)) {
return -EINVAL;
}
@ -1142,8 +1238,19 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
return ret;
}
/**
* vmdk_write:
* @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature
* if possible, otherwise return -ENOTSUP.
* @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
* with each cluster. By dry run we can find if the zero write
* is possible without modifying image data.
*
* Returns: error code with 0 for success.
*/
static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
const uint8_t *buf, int nb_sectors,
bool zeroed, bool zero_dry_run)
{
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent = NULL;
@ -1173,7 +1280,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
sector_num << 9, !extent->compressed,
&cluster_offset);
if (extent->compressed) {
if (ret == 0) {
if (ret == VMDK_OK) {
/* Refuse write to allocated cluster for streamOptimized */
fprintf(stderr,
"VMDK: can't write to allocated cluster"
@ -1189,7 +1296,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
&cluster_offset);
}
}
if (ret) {
if (ret == VMDK_ERROR) {
return -EINVAL;
}
extent_begin_sector = extent->end_sector - extent->sectors;
@ -1199,17 +1306,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (n > nb_sectors) {
n = nb_sectors;
}
ret = vmdk_write_extent(extent,
cluster_offset, index_in_cluster * 512,
buf, n, sector_num);
if (ret) {
return ret;
}
if (m_data.valid) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data) == -1) {
return -EIO;
if (zeroed) {
/* Do zeroed write, buf is ignored */
if (extent->has_zero_grain &&
index_in_cluster == 0 &&
n >= extent->cluster_sectors) {
n = extent->cluster_sectors;
if (!zero_dry_run) {
m_data.offset = VMDK_GTE_ZEROED;
/* update L2 tables */
if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
return -EIO;
}
}
} else {
return -ENOTSUP;
}
} else {
ret = vmdk_write_extent(extent,
cluster_offset, index_in_cluster * 512,
buf, n, sector_num);
if (ret) {
return ret;
}
if (m_data.valid) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
return -EIO;
}
}
}
nb_sectors -= n;
@ -1235,14 +1359,31 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
int ret;
BDRVVmdkState *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
ret = vmdk_write(bs, sector_num, buf, nb_sectors);
ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors)
{
int ret;
BDRVVmdkState *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
/* write zeroes could fail if sectors not aligned to cluster, test it with
* dry_run == true before really updating image */
ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
if (!ret) {
ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
}
qemu_co_mutex_unlock(&s->lock);
return ret;
}
static int vmdk_create_extent(const char *filename, int64_t filesize,
bool flat, bool compress)
bool flat, bool compress, bool zeroed_grain)
{
int ret, i;
int fd = 0;
@ -1264,18 +1405,19 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
magic = cpu_to_be32(VMDK4_MAGIC);
memset(&header, 0, sizeof(header));
header.version = 1;
header.flags =
3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
header.version = zeroed_grain ? 2 : 1;
header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
| (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
| (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
header.capacity = filesize / 512;
header.granularity = 128;
header.num_gtes_per_gte = 512;
header.num_gtes_per_gt = 512;
grains = (filesize / 512 + header.granularity - 1) / header.granularity;
gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
gt_size = ((header.num_gtes_per_gt * sizeof(uint32_t)) + 511) >> 9;
gt_count =
(grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
(grains + header.num_gtes_per_gt - 1) / header.num_gtes_per_gt;
gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
header.desc_offset = 1;
@ -1291,7 +1433,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
header.flags = cpu_to_le32(header.flags);
header.capacity = cpu_to_le64(header.capacity);
header.granularity = cpu_to_le64(header.granularity);
header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
header.desc_offset = cpu_to_le64(header.desc_offset);
header.desc_size = cpu_to_le64(header.desc_size);
header.rgd_offset = cpu_to_le64(header.rgd_offset);
@ -1357,7 +1499,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
if (filename == NULL || !strlen(filename)) {
fprintf(stderr, "Vmdk: no filename provided.\n");
return -1;
return VMDK_ERROR;
}
p = strrchr(filename, '/');
if (p == NULL) {
@ -1369,7 +1511,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
if (p != NULL) {
p++;
if (p - filename >= buf_len) {
return -1;
return VMDK_ERROR;
}
pstrcpy(path, p - filename + 1, filename);
} else {
@ -1382,51 +1524,12 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
postfix[0] = '\0';
} else {
if (q - p >= buf_len) {
return -1;
return VMDK_ERROR;
}
pstrcpy(prefix, q - p + 1, p);
pstrcpy(postfix, buf_len, q);
}
return 0;
}
static int relative_path(char *dest, int dest_size,
const char *base, const char *target)
{
int i = 0;
int n = 0;
const char *p, *q;
#ifdef _WIN32
const char *sep = "\\";
#else
const char *sep = "/";
#endif
if (!(dest && base && target)) {
return -1;
}
if (path_is_absolute(target)) {
pstrcpy(dest, dest_size, target);
return 0;
}
while (base[i] == target[i]) {
i++;
}
p = &base[i];
q = &target[i];
while (*p) {
if (*p == *sep) {
n++;
}
p++;
}
dest[0] = '\0';
for (; n; n--) {
pstrcat(dest, dest_size, "..");
pstrcat(dest, dest_size, sep);
}
pstrcat(dest, dest_size, q);
return 0;
return VMDK_OK;
}
static int vmdk_create(const char *filename, QEMUOptionParameter *options)
@ -1447,6 +1550,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
char parent_desc_line[BUF_SIZE] = "";
uint32_t parent_cid = 0xffffffff;
uint32_t number_heads = 16;
bool zeroed_grain = false;
const char desc_template[] =
"# Disk DescriptorFile\n"
"version=1\n"
@ -1482,6 +1586,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
} else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
fmt = options->value.s;
} else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) {
zeroed_grain |= options->value.n;
}
options++;
}
@ -1525,7 +1631,6 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
return -ENOTSUP;
}
if (backing_file) {
char parent_filename[PATH_MAX];
BlockDriverState *bs = bdrv_new("");
ret = bdrv_open(bs, backing_file, NULL, 0, NULL);
if (ret != 0) {
@ -1538,10 +1643,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
}
parent_cid = vmdk_read_cid(bs, 0);
bdrv_delete(bs);
relative_path(parent_filename, sizeof(parent_filename),
filename, backing_file);
snprintf(parent_desc_line, sizeof(parent_desc_line),
"parentFileNameHint=\"%s\"", parent_filename);
"parentFileNameHint=\"%s\"", backing_file);
}
/* Create extents */
@ -1568,7 +1671,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
snprintf(ext_filename, sizeof(ext_filename), "%s%s",
path, desc_filename);
if (vmdk_create_extent(ext_filename, size, flat, compress)) {
if (vmdk_create_extent(ext_filename, size,
flat, compress, zeroed_grain)) {
return -EINVAL;
}
filesize -= size;
@ -1665,6 +1769,23 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
return ret;
}
static int vmdk_has_zero_init(BlockDriverState *bs)
{
int i;
BDRVVmdkState *s = bs->opaque;
/* If has a flat extent and its underlying storage doesn't have zero init,
* return 0. */
for (i = 0; i < s->num_extents; i++) {
if (s->extents[i].flat) {
if (!bdrv_has_zero_init(s->extents[i].file)) {
return 0;
}
}
}
return 1;
}
static QEMUOptionParameter vmdk_create_options[] = {
{
.name = BLOCK_OPT_SIZE,
@ -1694,24 +1815,31 @@ static QEMUOptionParameter vmdk_create_options[] = {
"VMDK flat extent format, can be one of "
"{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
},
{
.name = BLOCK_OPT_ZEROED_GRAIN,
.type = OPT_FLAG,
.help = "Enable efficient zero writes using the zeroed-grain GTE feature"
},
{ NULL }
};
static BlockDriver bdrv_vmdk = {
.format_name = "vmdk",
.instance_size = sizeof(BDRVVmdkState),
.bdrv_probe = vmdk_probe,
.bdrv_open = vmdk_open,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_read = vmdk_co_read,
.bdrv_write = vmdk_co_write,
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,
.bdrv_co_is_allocated = vmdk_co_is_allocated,
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
.format_name = "vmdk",
.instance_size = sizeof(BDRVVmdkState),
.bdrv_probe = vmdk_probe,
.bdrv_open = vmdk_open,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_read = vmdk_co_read,
.bdrv_write = vmdk_co_write,
.bdrv_co_write_zeroes = vmdk_co_write_zeroes,
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,
.bdrv_co_is_allocated = vmdk_co_is_allocated,
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
.bdrv_has_zero_init = vmdk_has_zero_init,
.create_options = vmdk_create_options,
.create_options = vmdk_create_options,
};
static void bdrv_vmdk_init(void)

View File

@ -786,6 +786,18 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
return ret;
}
static int vpc_has_zero_init(BlockDriverState *bs)
{
BDRVVPCState *s = bs->opaque;
struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf;
if (cpu_to_be32(footer->type) == VHD_FIXED) {
return bdrv_has_zero_init(bs->file);
} else {
return 1;
}
}
static void vpc_close(BlockDriverState *bs)
{
BDRVVPCState *s = bs->opaque;
@ -818,16 +830,17 @@ static BlockDriver bdrv_vpc = {
.format_name = "vpc",
.instance_size = sizeof(BDRVVPCState),
.bdrv_probe = vpc_probe,
.bdrv_open = vpc_open,
.bdrv_close = vpc_close,
.bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_create = vpc_create,
.bdrv_probe = vpc_probe,
.bdrv_open = vpc_open,
.bdrv_close = vpc_close,
.bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_create = vpc_create,
.bdrv_read = vpc_co_read,
.bdrv_write = vpc_co_write,
.create_options = vpc_create_options,
.create_options = vpc_create_options,
.bdrv_has_zero_init = vpc_has_zero_init,
};
static void bdrv_vpc_init(void)

View File

@ -1,4 +1,4 @@
/* vim:set shiftwidth=4 ts=8: */
/* vim:set shiftwidth=4 ts=4: */
/*
* QEMU Block driver for virtual VFAT (shadows a local directory)
*
@ -28,6 +28,8 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include "migration/migration.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qbool.h"
#ifndef S_IWGRP
#define S_IWGRP 0
@ -988,10 +990,90 @@ static void vvfat_rebind(BlockDriverState *bs)
s->bs = bs;
}
static int vvfat_open(BlockDriverState *bs, const char* dirname, int flags)
static QemuOptsList runtime_opts = {
.name = "vvfat",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
.name = "dir",
.type = QEMU_OPT_STRING,
.help = "Host directory to map to the vvfat device",
},
{
.name = "fat-type",
.type = QEMU_OPT_NUMBER,
.help = "FAT type (12, 16 or 32)",
},
{
.name = "floppy",
.type = QEMU_OPT_BOOL,
.help = "Create a floppy rather than a hard disk image",
},
{
.name = "rw",
.type = QEMU_OPT_BOOL,
.help = "Make the image writable",
},
{ /* end of list */ }
},
};
static void vvfat_parse_filename(const char *filename, QDict *options,
Error **errp)
{
int fat_type = 0;
bool floppy = false;
bool rw = false;
int i;
if (!strstart(filename, "fat:", NULL)) {
error_setg(errp, "File name string must start with 'fat:'");
return;
}
/* Parse options */
if (strstr(filename, ":32:")) {
fat_type = 32;
} else if (strstr(filename, ":16:")) {
fat_type = 16;
} else if (strstr(filename, ":12:")) {
fat_type = 12;
}
if (strstr(filename, ":floppy:")) {
floppy = true;
}
if (strstr(filename, ":rw:")) {
rw = true;
}
/* Get the directory name without options */
i = strrchr(filename, ':') - filename;
assert(i >= 3);
if (filename[i - 2] == ':' && qemu_isalpha(filename[i - 1])) {
/* workaround for DOS drive names */
filename += i - 1;
} else {
filename += i + 1;
}
/* Fill in the options QDict */
qdict_put(options, "dir", qstring_from_str(filename));
qdict_put(options, "fat-type", qint_from_int(fat_type));
qdict_put(options, "floppy", qbool_from_int(floppy));
qdict_put(options, "rw", qbool_from_int(rw));
}
static int vvfat_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVVVFATState *s = bs->opaque;
int i, cyls, heads, secs;
int cyls, heads, secs;
bool floppy;
const char *dirname;
QemuOpts *opts;
Error *local_err = NULL;
int ret;
#ifdef DEBUG
vvv = s;
@ -1002,6 +1084,65 @@ DLOG(if (stderr == NULL) {
setbuf(stderr, NULL);
})
opts = qemu_opts_create_nofail(&runtime_opts);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (error_is_set(&local_err)) {
qerror_report_err(local_err);
error_free(local_err);
ret = -EINVAL;
goto fail;
}
dirname = qemu_opt_get(opts, "dir");
if (!dirname) {
qerror_report(ERROR_CLASS_GENERIC_ERROR, "vvfat block driver requires "
"a 'dir' option");
ret = -EINVAL;
goto fail;
}
s->fat_type = qemu_opt_get_number(opts, "fat-type", 0);
floppy = qemu_opt_get_bool(opts, "floppy", false);
if (floppy) {
/* 1.44MB or 2.88MB floppy. 2.88MB can be FAT12 (default) or FAT16. */
if (!s->fat_type) {
s->fat_type = 12;
secs = 36;
s->sectors_per_cluster = 2;
} else {
secs = s->fat_type == 12 ? 18 : 36;
s->sectors_per_cluster = 1;
}
s->first_sectors_number = 1;
cyls = 80;
heads = 2;
} else {
/* 32MB or 504MB disk*/
if (!s->fat_type) {
s->fat_type = 16;
}
cyls = s->fat_type == 12 ? 64 : 1024;
heads = 16;
secs = 63;
}
switch (s->fat_type) {
case 32:
fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. "
"You are welcome to do so!\n");
break;
case 16:
case 12:
break;
default:
qerror_report(ERROR_CLASS_GENERIC_ERROR, "Valid FAT types are only "
"12, 16 and 32");
ret = -EINVAL;
goto fail;
}
s->bs = bs;
/* LATER TODO: if FAT32, adjust */
@ -1017,63 +1158,24 @@ DLOG(if (stderr == NULL) {
s->fat2 = NULL;
s->downcase_short_names = 1;
if (!strstart(dirname, "fat:", NULL))
return -1;
if (strstr(dirname, ":32:")) {
fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. You are welcome to do so!\n");
s->fat_type = 32;
} else if (strstr(dirname, ":16:")) {
s->fat_type = 16;
} else if (strstr(dirname, ":12:")) {
s->fat_type = 12;
}
if (strstr(dirname, ":floppy:")) {
/* 1.44MB or 2.88MB floppy. 2.88MB can be FAT12 (default) or FAT16. */
if (!s->fat_type) {
s->fat_type = 12;
secs = 36;
s->sectors_per_cluster=2;
} else {
secs = s->fat_type == 12 ? 18 : 36;
s->sectors_per_cluster=1;
}
s->first_sectors_number = 1;
cyls = 80;
heads = 2;
} else {
/* 32MB or 504MB disk*/
if (!s->fat_type) {
s->fat_type = 16;
}
cyls = s->fat_type == 12 ? 64 : 1024;
heads = 16;
secs = 63;
}
fprintf(stderr, "vvfat %s chs %d,%d,%d\n",
dirname, cyls, heads, secs);
s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
if (strstr(dirname, ":rw:")) {
if (enable_write_target(s))
return -1;
bs->read_only = 0;
if (qemu_opt_get_bool(opts, "rw", false)) {
ret = enable_write_target(s);
if (ret < 0) {
goto fail;
}
bs->read_only = 0;
}
i = strrchr(dirname, ':') - dirname;
assert(i >= 3);
if (dirname[i-2] == ':' && qemu_isalpha(dirname[i-1]))
/* workaround for DOS drive names */
dirname += i-1;
else
dirname += i+1;
bs->total_sectors = cyls * heads * secs;
if (init_directories(s, dirname, heads, secs)) {
return -1;
ret = -EIO;
goto fail;
}
s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count;
@ -1093,7 +1195,10 @@ DLOG(if (stderr == NULL) {
migrate_add_blocker(s->migration_blocker);
}
return 0;
ret = 0;
fail:
qemu_opts_del(opts);
return ret;
}
static inline void vvfat_close_current_file(BDRVVVFATState *s)
@ -2812,9 +2917,7 @@ static int enable_write_target(BDRVVVFATState *s)
s->qcow_filename = g_malloc(1024);
ret = get_tmp_filename(s->qcow_filename, 1024);
if (ret < 0) {
g_free(s->qcow_filename);
s->qcow_filename = NULL;
return ret;
goto err;
}
bdrv_qcow = bdrv_find_format("qcow");
@ -2822,18 +2925,18 @@ static int enable_write_target(BDRVVVFATState *s)
set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512);
set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:");
if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0)
return -1;
ret = bdrv_create(bdrv_qcow, s->qcow_filename, options);
if (ret < 0) {
goto err;
}
s->qcow = bdrv_new("");
if (s->qcow == NULL) {
return -1;
}
ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
if (ret < 0) {
return ret;
bdrv_delete(s->qcow);
goto err;
}
#ifndef _WIN32
@ -2846,6 +2949,11 @@ static int enable_write_target(BDRVVVFATState *s)
*(void**)s->bs->backing_hd->opaque = s;
return 0;
err:
g_free(s->qcow_filename);
s->qcow_filename = NULL;
return ret;
}
static void vvfat_close(BlockDriverState *bs)
@ -2865,15 +2973,18 @@ static void vvfat_close(BlockDriverState *bs)
}
static BlockDriver bdrv_vvfat = {
.format_name = "vvfat",
.instance_size = sizeof(BDRVVVFATState),
.bdrv_file_open = vvfat_open,
.bdrv_rebind = vvfat_rebind,
.bdrv_read = vvfat_co_read,
.bdrv_write = vvfat_co_write,
.bdrv_close = vvfat_close,
.bdrv_co_is_allocated = vvfat_co_is_allocated,
.protocol_name = "fat",
.format_name = "vvfat",
.protocol_name = "fat",
.instance_size = sizeof(BDRVVVFATState),
.bdrv_parse_filename = vvfat_parse_filename,
.bdrv_file_open = vvfat_open,
.bdrv_close = vvfat_close,
.bdrv_rebind = vvfat_rebind,
.bdrv_read = vvfat_co_read,
.bdrv_write = vvfat_co_write,
.bdrv_co_is_allocated = vvfat_co_is_allocated,
};
static void bdrv_vvfat_init(void)

View File

@ -25,7 +25,6 @@
#include "qemu/timer.h"
#include "block/block_int.h"
#include "qemu/module.h"
#include "qemu-common.h"
#include "block/aio.h"
#include "raw-aio.h"
#include "qemu/event_notifier.h"

View File

@ -10,7 +10,7 @@
*/
#include "sysemu/blockdev.h"
#include "hw/block-common.h"
#include "hw/block/block.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
#include "sysemu/sysemu.h"

File diff suppressed because it is too large Load Diff

View File

@ -98,7 +98,7 @@ enum {
static const char *get_elf_platform(void)
{
static char elf_platform[] = "i386";
int family = (thread_env->cpuid_version >> 8) & 0xff;
int family = object_property_get_int(OBJECT(thread_cpu), "family", NULL);
if (family > 6)
family = 6;
if (family >= 3)
@ -110,7 +110,9 @@ static const char *get_elf_platform(void)
static uint32_t get_elf_hwcap(void)
{
return thread_env->cpuid_features;
X86CPU *cpu = X86_CPU(thread_cpu);
return cpu->env.features[FEAT_1_EDX];
}
#ifdef TARGET_X86_64

View File

@ -92,7 +92,7 @@ void fork_start(void)
void fork_end(int child)
{
if (child) {
gdbserver_fork(thread_env);
gdbserver_fork((CPUArchState *)thread_cpu->env_ptr);
}
}
@ -511,6 +511,7 @@ static void flush_windows(CPUSPARCState *env)
void cpu_loop(CPUSPARCState *env)
{
CPUState *cs = CPU(sparc_env_get_cpu(env));
int trapnr, ret, syscall_nr;
//target_siginfo_t info;
@ -642,7 +643,7 @@ void cpu_loop(CPUSPARCState *env)
{
int sig;
sig = gdb_handlesig (env, TARGET_SIGTRAP);
sig = gdb_handlesig(cs, TARGET_SIGTRAP);
#if 0
if (sig)
{
@ -659,7 +660,7 @@ void cpu_loop(CPUSPARCState *env)
badtrap:
#endif
printf ("Unhandled trap: 0x%x\n", trapnr);
cpu_dump_state(env, stderr, fprintf, 0);
cpu_dump_state(cs, stderr, fprintf, 0);
exit (1);
}
process_pending_signals (env);
@ -670,8 +671,8 @@ void cpu_loop(CPUSPARCState *env)
static void usage(void)
{
printf("qemu-" TARGET_ARCH " version " QEMU_VERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n"
"usage: qemu-" TARGET_ARCH " [options] program [arguments...]\n"
printf("qemu-" TARGET_NAME " version " QEMU_VERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n"
"usage: qemu-" TARGET_NAME " [options] program [arguments...]\n"
"BSD CPU emulator (compiled for %s emulation)\n"
"\n"
"Standard options:\n"
@ -706,13 +707,13 @@ static void usage(void)
"Note that if you provide several changes to single variable\n"
"last change will stay in effect.\n"
,
TARGET_ARCH,
TARGET_NAME,
interp_prefix,
x86_stack_size);
exit(1);
}
THREAD CPUArchState *thread_env;
THREAD CPUState *thread_cpu;
/* Assumes contents are already zeroed. */
void init_task_state(TaskState *ts)
@ -737,6 +738,7 @@ int main(int argc, char **argv)
struct image_info info1, *info = &info1;
TaskState ts1, *ts = &ts1;
CPUArchState *env;
CPUState *cpu;
int optind;
const char *r;
int gdbstub_port = 0;
@ -911,10 +913,11 @@ int main(int argc, char **argv)
fprintf(stderr, "Unable to find CPU definition\n");
exit(1);
}
cpu = ENV_GET_CPU(env);
#if defined(TARGET_SPARC) || defined(TARGET_PPC)
cpu_reset(ENV_GET_CPU(env));
cpu_reset(cpu);
#endif
thread_env = env;
thread_cpu = cpu;
if (getenv("QEMU_STRACE")) {
do_strace = 1;
@ -1004,13 +1007,13 @@ int main(int argc, char **argv)
env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
env->hflags |= HF_PE_MASK;
if (env->cpuid_features & CPUID_SSE) {
if (env->features[FEAT_1_EDX] & CPUID_SSE) {
env->cr[4] |= CR4_OSFXSR_MASK;
env->hflags |= HF_OSFXSR_MASK;
}
#ifndef TARGET_ABI32
/* enable 64 bit mode if possible */
if (!(env->cpuid_ext2_features & CPUID_EXT2_LM)) {
if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
exit(1);
}
@ -1133,7 +1136,7 @@ int main(int argc, char **argv)
if (gdbstub_port) {
gdbserver_start (gdbstub_port);
gdb_handlesig(env, 0);
gdb_handlesig(cpu, 0);
}
cpu_loop(env);
/* never exits */

View File

@ -139,7 +139,7 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
abi_long arg2, abi_long arg3, abi_long arg4,
abi_long arg5, abi_long arg6);
void gemu_log(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
extern THREAD CPUArchState *thread_env;
extern THREAD CPUState *thread_cpu;
void cpu_loop(CPUArchState *env);
char *target_strerror(int err);
int get_osversion(void);

View File

@ -211,7 +211,12 @@ static int sysctl_oldcvt(void *holdp, size_t holdlen, uint32_t kind)
*(uint64_t *)holdp = tswap64(*(unsigned long *)holdp);
break;
#endif
#ifdef CTLTYPE_U64
case CTLTYPE_S64:
case CTLTYPE_U64:
#else
case CTLTYPE_QUAD:
#endif
*(uint64_t *)holdp = tswap64(*(uint64_t *)holdp);
break;
case CTLTYPE_STRING:

View File

@ -18,7 +18,7 @@
*/
#include "qemu-common.h"
#include "bt/bt.h"
#include "sysemu/bt.h"
#include "qemu/main-loop.h"
#ifndef _WIN32
@ -171,7 +171,7 @@ struct HCIInfo *bt_host_hci(const char *id)
hci_filter_all_ptypes(&flt);
hci_filter_all_events(&flt);
if (setsockopt(fd, SOL_HCI, HCI_FILTER, &flt, sizeof(flt)) < 0) {
if (qemu_setsockopt(fd, SOL_HCI, HCI_FILTER, &flt, sizeof(flt)) < 0) {
fprintf(stderr, "qemu: Can't set HCI filter on socket (%i)\n", errno);
return 0;
}

View File

@ -18,7 +18,7 @@
*/
#include "qemu-common.h"
#include "bt/bt.h"
#include "sysemu/bt.h"
#include "hw/bt.h"
#include "qemu/main-loop.h"

612
cmd.c
View File

@ -1,612 +0,0 @@
/*
* Copyright (c) 2003-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <sys/time.h>
#include <getopt.h>
#include "cmd.h"
#include "block/aio.h"
#include "qemu/main-loop.h"
#define _(x) x /* not gettext support yet */
/* from libxcmd/command.c */
cmdinfo_t *cmdtab;
int ncmds;
static argsfunc_t args_func;
static checkfunc_t check_func;
static int ncmdline;
static char **cmdline;
static int
compare(const void *a, const void *b)
{
return strcmp(((const cmdinfo_t *)a)->name,
((const cmdinfo_t *)b)->name);
}
void add_command(const cmdinfo_t *ci)
{
cmdtab = g_realloc((void *)cmdtab, ++ncmds * sizeof(*cmdtab));
cmdtab[ncmds - 1] = *ci;
qsort(cmdtab, ncmds, sizeof(*cmdtab), compare);
}
static int
check_command(
const cmdinfo_t *ci)
{
if (check_func)
return check_func(ci);
return 1;
}
void
add_check_command(
checkfunc_t cf)
{
check_func = cf;
}
int
command_usage(
const cmdinfo_t *ci)
{
printf("%s %s -- %s\n", ci->name, ci->args, ci->oneline);
return 0;
}
int
command(
const cmdinfo_t *ct,
int argc,
char **argv)
{
char *cmd = argv[0];
if (!check_command(ct))
return 0;
if (argc-1 < ct->argmin || (ct->argmax != -1 && argc-1 > ct->argmax)) {
if (ct->argmax == -1)
fprintf(stderr,
_("bad argument count %d to %s, expected at least %d arguments\n"),
argc-1, cmd, ct->argmin);
else if (ct->argmin == ct->argmax)
fprintf(stderr,
_("bad argument count %d to %s, expected %d arguments\n"),
argc-1, cmd, ct->argmin);
else
fprintf(stderr,
_("bad argument count %d to %s, expected between %d and %d arguments\n"),
argc-1, cmd, ct->argmin, ct->argmax);
return 0;
}
optind = 0;
return ct->cfunc(argc, argv);
}
const cmdinfo_t *
find_command(
const char *cmd)
{
cmdinfo_t *ct;
for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) {
if (strcmp(ct->name, cmd) == 0 ||
(ct->altname && strcmp(ct->altname, cmd) == 0))
return (const cmdinfo_t *)ct;
}
return NULL;
}
void add_user_command(char *optarg)
{
cmdline = g_realloc(cmdline, ++ncmdline * sizeof(char *));
cmdline[ncmdline-1] = optarg;
}
static int
args_command(
int index)
{
if (args_func)
return args_func(index);
return 0;
}
void
add_args_command(
argsfunc_t af)
{
args_func = af;
}
static void prep_fetchline(void *opaque)
{
int *fetchable = opaque;
qemu_set_fd_handler(STDIN_FILENO, NULL, NULL, NULL);
*fetchable= 1;
}
static char *get_prompt(void);
void command_loop(void)
{
int c, i, j = 0, done = 0, fetchable = 0, prompted = 0;
char *input;
char **v;
const cmdinfo_t *ct;
for (i = 0; !done && i < ncmdline; i++) {
input = strdup(cmdline[i]);
if (!input) {
fprintf(stderr, _("cannot strdup command '%s': %s\n"),
cmdline[i], strerror(errno));
exit(1);
}
v = breakline(input, &c);
if (c) {
ct = find_command(v[0]);
if (ct) {
if (ct->flags & CMD_FLAG_GLOBAL) {
done = command(ct, c, v);
} else {
j = 0;
while (!done && (j = args_command(j))) {
done = command(ct, c, v);
}
}
} else {
fprintf(stderr, _("command \"%s\" not found\n"), v[0]);
}
}
doneline(input, v);
}
if (cmdline) {
g_free(cmdline);
return;
}
while (!done) {
if (!prompted) {
printf("%s", get_prompt());
fflush(stdout);
qemu_set_fd_handler(STDIN_FILENO, prep_fetchline, NULL, &fetchable);
prompted = 1;
}
main_loop_wait(false);
if (!fetchable) {
continue;
}
input = fetchline();
if (input == NULL) {
break;
}
v = breakline(input, &c);
if (c) {
ct = find_command(v[0]);
if (ct) {
done = command(ct, c, v);
} else {
fprintf(stderr, _("command \"%s\" not found\n"), v[0]);
}
}
doneline(input, v);
prompted = 0;
fetchable = 0;
}
qemu_set_fd_handler(STDIN_FILENO, NULL, NULL, NULL);
}
/* from libxcmd/input.c */
#if defined(ENABLE_READLINE)
# include <readline/history.h>
# include <readline/readline.h>
#elif defined(ENABLE_EDITLINE)
# include <histedit.h>
#endif
static char *
get_prompt(void)
{
static char prompt[FILENAME_MAX + 2 /*"> "*/ + 1 /*"\0"*/ ];
if (!prompt[0])
snprintf(prompt, sizeof(prompt), "%s> ", progname);
return prompt;
}
#if defined(ENABLE_READLINE)
char *
fetchline(void)
{
char *line;
line = readline(get_prompt());
if (line && *line)
add_history(line);
return line;
}
#elif defined(ENABLE_EDITLINE)
static char *el_get_prompt(EditLine *e) { return get_prompt(); }
char *
fetchline(void)
{
static EditLine *el;
static History *hist;
HistEvent hevent;
char *line;
int count;
if (!el) {
hist = history_init();
history(hist, &hevent, H_SETSIZE, 100);
el = el_init(progname, stdin, stdout, stderr);
el_source(el, NULL);
el_set(el, EL_SIGNAL, 1);
el_set(el, EL_PROMPT, el_get_prompt);
el_set(el, EL_HIST, history, (const char *)hist);
}
line = strdup(el_gets(el, &count));
if (line) {
if (count > 0)
line[count-1] = '\0';
if (*line)
history(hist, &hevent, H_ENTER, line);
}
return line;
}
#else
# define MAXREADLINESZ 1024
char *
fetchline(void)
{
char *p, *line = malloc(MAXREADLINESZ);
if (!line)
return NULL;
if (!fgets(line, MAXREADLINESZ, stdin)) {
free(line);
return NULL;
}
p = line + strlen(line);
if (p != line && p[-1] == '\n')
p[-1] = '\0';
return line;
}
#endif
static char *qemu_strsep(char **input, const char *delim)
{
char *result = *input;
if (result != NULL) {
char *p;
for (p = result; *p != '\0'; p++) {
if (strchr(delim, *p)) {
break;
}
}
if (*p == '\0') {
*input = NULL;
} else {
*p = '\0';
*input = p + 1;
}
}
return result;
}
char **breakline(char *input, int *count)
{
int c = 0;
char *p;
char **rval = calloc(sizeof(char *), 1);
char **tmp;
while (rval && (p = qemu_strsep(&input, " ")) != NULL) {
if (!*p) {
continue;
}
c++;
tmp = realloc(rval, sizeof(*rval) * (c + 1));
if (!tmp) {
free(rval);
rval = NULL;
c = 0;
break;
} else {
rval = tmp;
}
rval[c - 1] = p;
rval[c] = NULL;
}
*count = c;
return rval;
}
void
doneline(
char *input,
char **vec)
{
free(input);
free(vec);
}
#define EXABYTES(x) ((long long)(x) << 60)
#define PETABYTES(x) ((long long)(x) << 50)
#define TERABYTES(x) ((long long)(x) << 40)
#define GIGABYTES(x) ((long long)(x) << 30)
#define MEGABYTES(x) ((long long)(x) << 20)
#define KILOBYTES(x) ((long long)(x) << 10)
long long
cvtnum(
char *s)
{
long long i;
char *sp;
int c;
i = strtoll(s, &sp, 0);
if (i == 0 && sp == s)
return -1LL;
if (*sp == '\0')
return i;
if (sp[1] != '\0')
return -1LL;
c = qemu_tolower(*sp);
switch (c) {
default:
return i;
case 'k':
return KILOBYTES(i);
case 'm':
return MEGABYTES(i);
case 'g':
return GIGABYTES(i);
case 't':
return TERABYTES(i);
case 'p':
return PETABYTES(i);
case 'e':
return EXABYTES(i);
}
return -1LL;
}
#define TO_EXABYTES(x) ((x) / EXABYTES(1))
#define TO_PETABYTES(x) ((x) / PETABYTES(1))
#define TO_TERABYTES(x) ((x) / TERABYTES(1))
#define TO_GIGABYTES(x) ((x) / GIGABYTES(1))
#define TO_MEGABYTES(x) ((x) / MEGABYTES(1))
#define TO_KILOBYTES(x) ((x) / KILOBYTES(1))
void
cvtstr(
double value,
char *str,
size_t size)
{
char *trim;
const char *suffix;
if (value >= EXABYTES(1)) {
suffix = " EiB";
snprintf(str, size - 4, "%.3f", TO_EXABYTES(value));
} else if (value >= PETABYTES(1)) {
suffix = " PiB";
snprintf(str, size - 4, "%.3f", TO_PETABYTES(value));
} else if (value >= TERABYTES(1)) {
suffix = " TiB";
snprintf(str, size - 4, "%.3f", TO_TERABYTES(value));
} else if (value >= GIGABYTES(1)) {
suffix = " GiB";
snprintf(str, size - 4, "%.3f", TO_GIGABYTES(value));
} else if (value >= MEGABYTES(1)) {
suffix = " MiB";
snprintf(str, size - 4, "%.3f", TO_MEGABYTES(value));
} else if (value >= KILOBYTES(1)) {
suffix = " KiB";
snprintf(str, size - 4, "%.3f", TO_KILOBYTES(value));
} else {
suffix = " bytes";
snprintf(str, size - 6, "%f", value);
}
trim = strstr(str, ".000");
if (trim) {
strcpy(trim, suffix);
} else {
strcat(str, suffix);
}
}
struct timeval
tsub(struct timeval t1, struct timeval t2)
{
t1.tv_usec -= t2.tv_usec;
if (t1.tv_usec < 0) {
t1.tv_usec += 1000000;
t1.tv_sec--;
}
t1.tv_sec -= t2.tv_sec;
return t1;
}
double
tdiv(double value, struct timeval tv)
{
return value / ((double)tv.tv_sec + ((double)tv.tv_usec / 1000000.0));
}
#define HOURS(sec) ((sec) / (60 * 60))
#define MINUTES(sec) (((sec) % (60 * 60)) / 60)
#define SECONDS(sec) ((sec) % 60)
void
timestr(
struct timeval *tv,
char *ts,
size_t size,
int format)
{
double usec = (double)tv->tv_usec / 1000000.0;
if (format & TERSE_FIXED_TIME) {
if (!HOURS(tv->tv_sec)) {
snprintf(ts, size, "%u:%02u.%02u",
(unsigned int) MINUTES(tv->tv_sec),
(unsigned int) SECONDS(tv->tv_sec),
(unsigned int) (usec * 100));
return;
}
format |= VERBOSE_FIXED_TIME; /* fallback if hours needed */
}
if ((format & VERBOSE_FIXED_TIME) || tv->tv_sec) {
snprintf(ts, size, "%u:%02u:%02u.%02u",
(unsigned int) HOURS(tv->tv_sec),
(unsigned int) MINUTES(tv->tv_sec),
(unsigned int) SECONDS(tv->tv_sec),
(unsigned int) (usec * 100));
} else {
snprintf(ts, size, "0.%04u sec", (unsigned int) (usec * 10000));
}
}
/* from libxcmd/quit.c */
static cmdinfo_t quit_cmd;
/* ARGSUSED */
static int
quit_f(
int argc,
char **argv)
{
return 1;
}
void
quit_init(void)
{
quit_cmd.name = _("quit");
quit_cmd.altname = _("q");
quit_cmd.cfunc = quit_f;
quit_cmd.argmin = -1;
quit_cmd.argmax = -1;
quit_cmd.flags = CMD_FLAG_GLOBAL;
quit_cmd.oneline = _("exit the program");
add_command(&quit_cmd);
}
/* from libxcmd/help.c */
static cmdinfo_t help_cmd;
static void help_onecmd(const char *cmd, const cmdinfo_t *ct);
static void help_oneline(const char *cmd, const cmdinfo_t *ct);
static void
help_all(void)
{
const cmdinfo_t *ct;
for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++)
help_oneline(ct->name, ct);
printf(_("\nUse 'help commandname' for extended help.\n"));
}
static int
help_f(
int argc,
char **argv)
{
const cmdinfo_t *ct;
if (argc == 1) {
help_all();
return 0;
}
ct = find_command(argv[1]);
if (ct == NULL) {
printf(_("command %s not found\n"), argv[1]);
return 0;
}
help_onecmd(argv[1], ct);
return 0;
}
static void
help_onecmd(
const char *cmd,
const cmdinfo_t *ct)
{
help_oneline(cmd, ct);
if (ct->help)
ct->help();
}
static void
help_oneline(
const char *cmd,
const cmdinfo_t *ct)
{
if (cmd)
printf("%s ", cmd);
else {
printf("%s ", ct->name);
if (ct->altname)
printf("(or %s) ", ct->altname);
}
if (ct->args)
printf("%s ", ct->args);
printf("-- %s\n", ct->oneline);
}
void
help_init(void)
{
help_cmd.name = _("help");
help_cmd.altname = _("?");
help_cmd.cfunc = help_f;
help_cmd.argmin = 0;
help_cmd.argmax = 1;
help_cmd.flags = CMD_FLAG_GLOBAL;
help_cmd.args = _("[command]");
help_cmd.oneline = _("help for one or all commands");
add_command(&help_cmd);
}

79
cmd.h
View File

@ -1,79 +0,0 @@
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __COMMAND_H__
#define __COMMAND_H__
#define CMD_FLAG_GLOBAL ((int)0x80000000) /* don't iterate "args" */
typedef int (*cfunc_t)(int argc, char **argv);
typedef void (*helpfunc_t)(void);
typedef struct cmdinfo {
const char *name;
const char *altname;
cfunc_t cfunc;
int argmin;
int argmax;
int canpush;
int flags;
const char *args;
const char *oneline;
helpfunc_t help;
} cmdinfo_t;
extern cmdinfo_t *cmdtab;
extern int ncmds;
void help_init(void);
void quit_init(void);
typedef int (*argsfunc_t)(int index);
typedef int (*checkfunc_t)(const cmdinfo_t *ci);
void add_command(const cmdinfo_t *ci);
void add_user_command(char *optarg);
void add_args_command(argsfunc_t af);
void add_check_command(checkfunc_t cf);
const cmdinfo_t *find_command(const char *cmd);
void command_loop(void);
int command_usage(const cmdinfo_t *ci);
int command(const cmdinfo_t *ci, int argc, char **argv);
/* from input.h */
char **breakline(char *input, int *count);
void doneline(char *input, char **vec);
char *fetchline(void);
long long cvtnum(char *s);
void cvtstr(double value, char *str, size_t sz);
struct timeval tsub(struct timeval t1, struct timeval t2);
double tdiv(double value, struct timeval tv);
enum {
DEFAULT_TIME = 0x0,
TERSE_FIXED_TIME = 0x1,
VERBOSE_FIXED_TIME = 0x2
};
void timestr(struct timeval *tv, char *str, size_t sz, int flags);
extern char *progname;
#endif /* __COMMAND_H__ */

1131
configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -169,6 +169,7 @@ Coroutine *qemu_coroutine_new(void)
#ifdef CONFIG_VALGRIND_H
#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
/* Work around an unused variable in the valgrind.h macro... */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
#endif
static inline void valgrind_stack_deregister(CoroutineUContext *co)
@ -176,7 +177,7 @@ static inline void valgrind_stack_deregister(CoroutineUContext *co)
VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
}
#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
#pragma GCC diagnostic error "-Wunused-but-set-variable"
#pragma GCC diagnostic pop
#endif
#endif

View File

@ -23,8 +23,6 @@
#include "qemu/atomic.h"
#include "sysemu/qtest.h"
//#define CONFIG_DEBUG_EXEC
bool qemu_cpu_has_work(CPUState *cpu)
{
return cpu_has_work(cpu);
@ -61,8 +59,14 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
* counter hit zero); we must restore the guest PC to the address
* of the start of the TB.
*/
CPUClass *cc = CPU_GET_CLASS(cpu);
TranslationBlock *tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
cpu_pc_from_tb(env, tb);
if (cc->synchronize_from_tb) {
cc->synchronize_from_tb(cpu, tb);
} else {
assert(cc->set_pc);
cc->set_pc(cpu, tb->pc);
}
}
if ((next_tb & TB_EXIT_MASK) == TB_EXIT_REQUESTED) {
/* We were asked to stop executing TBs (probably a pending
@ -215,7 +219,15 @@ int cpu_exec(CPUArchState *env)
cpu->halted = 0;
}
cpu_single_env = env;
current_cpu = cpu;
/* As long as current_cpu is null, up to the assignment just above,
* requests by other threads to exit the execution loop are expected to
* be issued using the exit_request global. We must make sure that our
* evaluation of the global value is performed past the current_cpu
* value transition point, which requires a memory barrier as well as
* an instruction scheduling constraint on modern architectures. */
smp_mb();
if (unlikely(exit_request)) {
cpu->exit_request = 1;
@ -224,7 +236,7 @@ int cpu_exec(CPUArchState *env)
#if defined(TARGET_I386)
/* put eflags in CPU temporary format */
CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
DF = 1 - (2 * ((env->eflags >> 10) & 1));
env->df = 1 - (2 * ((env->eflags >> 10) & 1));
CC_OP = CC_OP_EFLAGS;
env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
#elif defined(TARGET_SPARC)
@ -240,6 +252,7 @@ int cpu_exec(CPUArchState *env)
#elif defined(TARGET_LM32)
#elif defined(TARGET_MICROBLAZE)
#elif defined(TARGET_MIPS)
#elif defined(TARGET_MOXIE)
#elif defined(TARGET_OPENRISC)
#elif defined(TARGET_SH4)
#elif defined(TARGET_CRIS)
@ -284,7 +297,7 @@ int cpu_exec(CPUArchState *env)
for(;;) {
interrupt_request = cpu->interrupt_request;
if (unlikely(interrupt_request)) {
if (unlikely(env->singlestep_enabled & SSTEP_NOIRQ)) {
if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
/* Mask out external interrupts for this step. */
interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
}
@ -324,7 +337,7 @@ int cpu_exec(CPUArchState *env)
cpu_svm_check_intercept_param(env, SVM_EXIT_SMI,
0);
cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
do_smm_enter(env);
do_smm_enter(x86_env_get_cpu(env));
next_tb = 0;
} else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
!(env->hflags2 & HF2_NMI_MASK)) {
@ -566,25 +579,22 @@ int cpu_exec(CPUArchState *env)
env->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(env);
}
#if defined(DEBUG_DISAS) || defined(CONFIG_DEBUG_EXEC)
#if defined(DEBUG_DISAS)
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
/* restore flags in standard format */
#if defined(TARGET_I386)
env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
| (DF & DF_MASK);
log_cpu_state(env, CPU_DUMP_CCOP);
env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
log_cpu_state(cpu, CPU_DUMP_CCOP);
#elif defined(TARGET_M68K)
cpu_m68k_flush_flags(env, env->cc_op);
env->cc_op = CC_OP_FLAGS;
env->sr = (env->sr & 0xffe0)
| env->cc_dest | (env->cc_x << 4);
log_cpu_state(env, 0);
log_cpu_state(cpu, 0);
#else
log_cpu_state(env, 0);
log_cpu_state(cpu, 0);
#endif
}
#endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */
#endif /* DEBUG_DISAS */
spin_lock(&tcg_ctx.tb_ctx.tb_lock);
tb = tb_find_fast(env);
/* Note: we do it here to avoid a gcc bug on Mac OS X when
@ -596,11 +606,10 @@ int cpu_exec(CPUArchState *env)
next_tb = 0;
tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
}
#ifdef CONFIG_DEBUG_EXEC
qemu_log_mask(CPU_LOG_EXEC, "Trace %p [" TARGET_FMT_lx "] %s\n",
tb->tc_ptr, tb->pc,
lookup_symbol(tb->pc));
#endif
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
tb->tc_ptr, tb->pc, lookup_symbol(tb->pc));
}
/* see if we can patch the calling TB. When the TB
spans two pages, we cannot safely do a direct
jump. */
@ -670,7 +679,8 @@ int cpu_exec(CPUArchState *env)
} else {
/* Reload env after longjmp - the compiler may have smashed all
* local variables as longjmp is marked 'noreturn'. */
env = cpu_single_env;
cpu = current_cpu;
env = cpu->env_ptr;
}
} /* for(;;) */
@ -678,7 +688,7 @@ int cpu_exec(CPUArchState *env)
#if defined(TARGET_I386)
/* restore flags in standard format */
env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
| (DF & DF_MASK);
| (env->df & DF_MASK);
#elif defined(TARGET_ARM)
/* XXX: Save/restore host fpu exception state?. */
#elif defined(TARGET_UNICORE32)
@ -692,6 +702,7 @@ int cpu_exec(CPUArchState *env)
| env->cc_dest | (env->cc_x << 4);
#elif defined(TARGET_MICROBLAZE)
#elif defined(TARGET_MIPS)
#elif defined(TARGET_MOXIE)
#elif defined(TARGET_OPENRISC)
#elif defined(TARGET_SH4)
#elif defined(TARGET_ALPHA)
@ -703,7 +714,7 @@ int cpu_exec(CPUArchState *env)
#error unsupported target CPU
#endif
/* fail safe : never use cpu_single_env outside cpu_exec() */
cpu_single_env = NULL;
/* fail safe : never use current_cpu outside cpu_exec() */
current_cpu = NULL;
return ret;
}

335
cpus.c
View File

@ -60,12 +60,10 @@
#endif /* CONFIG_LINUX */
static CPUArchState *next_cpu;
static CPUState *next_cpu;
static bool cpu_thread_is_idle(CPUArchState *env)
static bool cpu_thread_is_idle(CPUState *cpu)
{
CPUState *cpu = ENV_GET_CPU(env);
if (cpu->stop || cpu->queued_work_first) {
return false;
}
@ -73,7 +71,7 @@ static bool cpu_thread_is_idle(CPUArchState *env)
return true;
}
if (!cpu->halted || qemu_cpu_has_work(cpu) ||
kvm_async_interrupts_enabled()) {
kvm_halt_in_kernel()) {
return false;
}
return true;
@ -81,10 +79,10 @@ static bool cpu_thread_is_idle(CPUArchState *env)
static bool all_cpu_threads_idle(void)
{
CPUArchState *env;
CPUState *cpu;
for (env = first_cpu; env != NULL; env = env->next_cpu) {
if (!cpu_thread_is_idle(env)) {
for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
if (!cpu_thread_is_idle(cpu)) {
return false;
}
}
@ -114,16 +112,17 @@ typedef struct TimersState {
int64_t dummy;
} TimersState;
TimersState timers_state;
static TimersState timers_state;
/* Return the virtual CPU time, based on the instruction counter. */
int64_t cpu_get_icount(void)
{
int64_t icount;
CPUArchState *env = cpu_single_env;
CPUState *cpu = current_cpu;
icount = qemu_icount;
if (env) {
if (cpu) {
CPUArchState *env = cpu->env_ptr;
if (!can_do_io(env)) {
fprintf(stderr, "Bad clock read\n");
}
@ -389,17 +388,15 @@ void configure_icount(const char *option)
void hw_error(const char *fmt, ...)
{
va_list ap;
CPUArchState *env;
CPUState *cpu;
va_start(ap, fmt);
fprintf(stderr, "qemu: hardware error: ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
for (env = first_cpu; env != NULL; env = env->next_cpu) {
cpu = ENV_GET_CPU(env);
for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
}
va_end(ap);
abort();
@ -407,7 +404,7 @@ void hw_error(const char *fmt, ...)
void cpu_synchronize_all_states(void)
{
CPUArchState *cpu;
CPUState *cpu;
for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
cpu_synchronize_state(cpu);
@ -416,7 +413,7 @@ void cpu_synchronize_all_states(void)
void cpu_synchronize_all_post_reset(void)
{
CPUArchState *cpu;
CPUState *cpu;
for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
cpu_synchronize_post_reset(cpu);
@ -425,7 +422,7 @@ void cpu_synchronize_all_post_reset(void)
void cpu_synchronize_all_post_init(void)
{
CPUArchState *cpu;
CPUState *cpu;
for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
cpu_synchronize_post_init(cpu);
@ -437,17 +434,22 @@ bool cpu_is_stopped(CPUState *cpu)
return !runstate_is_running() || cpu->stopped;
}
static void do_vm_stop(RunState state)
static int do_vm_stop(RunState state)
{
int ret = 0;
if (runstate_is_running()) {
cpu_disable_ticks();
pause_all_vcpus();
runstate_set(state);
vm_state_notify(0, state);
bdrv_drain_all();
bdrv_flush_all();
monitor_protocol_event(QEVENT_STOP, NULL);
}
bdrv_drain_all();
ret = bdrv_flush_all();
return ret;
}
static bool cpu_can_run(CPUState *cpu)
@ -461,19 +463,17 @@ static bool cpu_can_run(CPUState *cpu)
return true;
}
static void cpu_handle_guest_debug(CPUArchState *env)
static void cpu_handle_guest_debug(CPUState *cpu)
{
CPUState *cpu = ENV_GET_CPU(env);
gdb_set_stop_cpu(env);
gdb_set_stop_cpu(cpu);
qemu_system_debug_request();
cpu->stopped = true;
}
static void cpu_signal(int sig)
{
if (cpu_single_env) {
cpu_exit(cpu_single_env);
if (current_cpu) {
cpu_exit(current_cpu);
}
exit_request = 1;
}
@ -570,7 +570,7 @@ static void dummy_signal(int sig)
{
}
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
static void qemu_kvm_init_cpu_signals(CPUState *cpu)
{
int r;
sigset_t set;
@ -583,7 +583,7 @@ static void qemu_kvm_init_cpu_signals(CPUArchState *env)
pthread_sigmask(SIG_BLOCK, NULL, &set);
sigdelset(&set, SIG_IPI);
sigdelset(&set, SIGBUS);
r = kvm_set_signal_mask(env, &set);
r = kvm_set_signal_mask(cpu, &set);
if (r) {
fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
exit(1);
@ -605,7 +605,7 @@ static void qemu_tcg_init_cpu_signals(void)
}
#else /* _WIN32 */
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
static void qemu_kvm_init_cpu_signals(CPUState *cpu)
{
abort();
}
@ -653,6 +653,7 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
wi.func = func;
wi.data = data;
wi.free = false;
if (cpu->queued_work_first == NULL) {
cpu->queued_work_first = &wi;
} else {
@ -664,13 +665,38 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
qemu_cpu_kick(cpu);
while (!wi.done) {
CPUArchState *self_env = cpu_single_env;
CPUState *self_cpu = current_cpu;
qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
cpu_single_env = self_env;
current_cpu = self_cpu;
}
}
void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
{
struct qemu_work_item *wi;
if (qemu_cpu_is_self(cpu)) {
func(data);
return;
}
wi = g_malloc0(sizeof(struct qemu_work_item));
wi->func = func;
wi->data = data;
wi->free = true;
if (cpu->queued_work_first == NULL) {
cpu->queued_work_first = wi;
} else {
cpu->queued_work_last->next = wi;
}
cpu->queued_work_last = wi;
wi->next = NULL;
wi->done = false;
qemu_cpu_kick(cpu);
}
static void flush_queued_work(CPUState *cpu)
{
struct qemu_work_item *wi;
@ -683,6 +709,9 @@ static void flush_queued_work(CPUState *cpu)
cpu->queued_work_first = wi->next;
wi->func(wi->data);
wi->done = true;
if (wi->free) {
g_free(wi);
}
}
cpu->queued_work_last = NULL;
qemu_cond_broadcast(&qemu_work_cond);
@ -701,7 +730,7 @@ static void qemu_wait_io_event_common(CPUState *cpu)
static void qemu_tcg_wait_io_event(void)
{
CPUArchState *env;
CPUState *cpu;
while (all_cpu_threads_idle()) {
/* Start accounting real time to the virtual clock if the CPUs
@ -714,16 +743,14 @@ static void qemu_tcg_wait_io_event(void)
qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
}
for (env = first_cpu; env != NULL; env = env->next_cpu) {
qemu_wait_io_event_common(ENV_GET_CPU(env));
for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
qemu_wait_io_event_common(cpu);
}
}
static void qemu_kvm_wait_io_event(CPUArchState *env)
static void qemu_kvm_wait_io_event(CPUState *cpu)
{
CPUState *cpu = ENV_GET_CPU(env);
while (cpu_thread_is_idle(env)) {
while (cpu_thread_is_idle(cpu)) {
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
@ -733,14 +760,13 @@ static void qemu_kvm_wait_io_event(CPUArchState *env)
static void *qemu_kvm_cpu_thread_fn(void *arg)
{
CPUArchState *env = arg;
CPUState *cpu = ENV_GET_CPU(env);
CPUState *cpu = arg;
int r;
qemu_mutex_lock(&qemu_global_mutex);
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu_single_env = env;
current_cpu = cpu;
r = kvm_init_vcpu(cpu);
if (r < 0) {
@ -748,7 +774,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
exit(1);
}
qemu_kvm_init_cpu_signals(env);
qemu_kvm_init_cpu_signals(cpu);
/* signal CPU creation */
cpu->created = true;
@ -756,12 +782,12 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
while (1) {
if (cpu_can_run(cpu)) {
r = kvm_cpu_exec(env);
r = kvm_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(env);
cpu_handle_guest_debug(cpu);
}
}
qemu_kvm_wait_io_event(env);
qemu_kvm_wait_io_event(cpu);
}
return NULL;
@ -773,8 +799,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
fprintf(stderr, "qtest is not supported under Windows\n");
exit(1);
#else
CPUArchState *env = arg;
CPUState *cpu = ENV_GET_CPU(env);
CPUState *cpu = arg;
sigset_t waitset;
int r;
@ -789,9 +814,9 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
cpu_single_env = env;
current_cpu = cpu;
while (1) {
cpu_single_env = NULL;
current_cpu = NULL;
qemu_mutex_unlock_iothread();
do {
int sig;
@ -802,7 +827,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
exit(1);
}
qemu_mutex_lock_iothread();
cpu_single_env = env;
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
@ -812,30 +837,30 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
static void tcg_exec_all(void);
static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
{
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
}
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
CPUArchState *env;
qemu_tcg_init_cpu_signals();
qemu_thread_get_self(cpu->thread);
/* signal CPU creation */
qemu_mutex_lock(&qemu_global_mutex);
for (env = first_cpu; env != NULL; env = env->next_cpu) {
cpu = ENV_GET_CPU(env);
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
}
qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
qemu_cond_signal(&qemu_cpu_cond);
/* wait for initial kick-off after machine start */
while (ENV_GET_CPU(first_cpu)->stopped) {
while (first_cpu->stopped) {
qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
/* process any pending work */
for (env = first_cpu; env != NULL; env = env->next_cpu) {
qemu_wait_io_event_common(ENV_GET_CPU(env));
for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
qemu_wait_io_event_common(cpu);
}
}
@ -862,9 +887,29 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
}
#else /* _WIN32 */
if (!qemu_cpu_is_self(cpu)) {
SuspendThread(cpu->hThread);
CONTEXT tcgContext;
if (SuspendThread(cpu->hThread) == (DWORD)-1) {
fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
GetLastError());
exit(1);
}
/* On multi-core systems, we are not sure that the thread is actually
* suspended until we can get the context.
*/
tcgContext.ContextFlags = CONTEXT_CONTROL;
while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
continue;
}
cpu_signal(0);
ResumeThread(cpu->hThread);
if (ResumeThread(cpu->hThread) == (DWORD)-1) {
fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
GetLastError());
exit(1);
}
}
#endif
}
@ -881,12 +926,11 @@ void qemu_cpu_kick(CPUState *cpu)
void qemu_cpu_kick_self(void)
{
#ifndef _WIN32
assert(cpu_single_env);
CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
assert(current_cpu);
if (!cpu_single_cpu->thread_kicked) {
qemu_cpu_kick_thread(cpu_single_cpu);
cpu_single_cpu->thread_kicked = true;
if (!current_cpu->thread_kicked) {
qemu_cpu_kick_thread(current_cpu);
current_cpu->thread_kicked = true;
}
#else
abort();
@ -900,7 +944,7 @@ bool qemu_cpu_is_self(CPUState *cpu)
static bool qemu_in_vcpu_thread(void)
{
return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
return current_cpu && qemu_cpu_is_self(current_cpu);
}
void qemu_mutex_lock_iothread(void)
@ -910,7 +954,7 @@ void qemu_mutex_lock_iothread(void)
} else {
iothread_requesting_mutex = true;
if (qemu_mutex_trylock(&qemu_global_mutex)) {
qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
qemu_cpu_kick_thread(first_cpu);
qemu_mutex_lock(&qemu_global_mutex);
}
iothread_requesting_mutex = false;
@ -925,14 +969,13 @@ void qemu_mutex_unlock_iothread(void)
static int all_vcpus_paused(void)
{
CPUArchState *penv = first_cpu;
CPUState *cpu = first_cpu;
while (penv) {
CPUState *pcpu = ENV_GET_CPU(penv);
if (!pcpu->stopped) {
while (cpu) {
if (!cpu->stopped) {
return 0;
}
penv = penv->next_cpu;
cpu = cpu->next_cpu;
}
return 1;
@ -940,24 +983,23 @@ static int all_vcpus_paused(void)
void pause_all_vcpus(void)
{
CPUArchState *penv = first_cpu;
CPUState *cpu = first_cpu;
qemu_clock_enable(vm_clock, false);
while (penv) {
CPUState *pcpu = ENV_GET_CPU(penv);
pcpu->stop = true;
qemu_cpu_kick(pcpu);
penv = penv->next_cpu;
while (cpu) {
cpu->stop = true;
qemu_cpu_kick(cpu);
cpu = cpu->next_cpu;
}
if (qemu_in_vcpu_thread()) {
cpu_stop_current();
if (!kvm_enabled()) {
while (penv) {
CPUState *pcpu = ENV_GET_CPU(penv);
pcpu->stop = 0;
pcpu->stopped = true;
penv = penv->next_cpu;
cpu = first_cpu;
while (cpu) {
cpu->stop = false;
cpu->stopped = true;
cpu = cpu->next_cpu;
}
return;
}
@ -965,25 +1007,29 @@ void pause_all_vcpus(void)
while (!all_vcpus_paused()) {
qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
penv = first_cpu;
while (penv) {
qemu_cpu_kick(ENV_GET_CPU(penv));
penv = penv->next_cpu;
cpu = first_cpu;
while (cpu) {
qemu_cpu_kick(cpu);
cpu = cpu->next_cpu;
}
}
}
void cpu_resume(CPUState *cpu)
{
cpu->stop = false;
cpu->stopped = false;
qemu_cpu_kick(cpu);
}
void resume_all_vcpus(void)
{
CPUArchState *penv = first_cpu;
CPUState *cpu = first_cpu;
qemu_clock_enable(vm_clock, true);
while (penv) {
CPUState *pcpu = ENV_GET_CPU(penv);
pcpu->stop = false;
pcpu->stopped = false;
qemu_cpu_kick(pcpu);
penv = penv->next_cpu;
while (cpu) {
cpu_resume(cpu);
cpu = cpu->next_cpu;
}
}
@ -1010,63 +1056,55 @@ static void qemu_tcg_init_vcpu(CPUState *cpu)
}
}
static void qemu_kvm_start_vcpu(CPUArchState *env)
static void qemu_kvm_start_vcpu(CPUState *cpu)
{
CPUState *cpu = ENV_GET_CPU(env);
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
QEMU_THREAD_JOINABLE);
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
static void qemu_dummy_start_vcpu(CPUArchState *env)
static void qemu_dummy_start_vcpu(CPUState *cpu)
{
CPUState *cpu = ENV_GET_CPU(env);
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
QEMU_THREAD_JOINABLE);
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
void qemu_init_vcpu(void *_env)
void qemu_init_vcpu(CPUState *cpu)
{
CPUArchState *env = _env;
CPUState *cpu = ENV_GET_CPU(env);
cpu->nr_cores = smp_cores;
cpu->nr_threads = smp_threads;
cpu->stopped = true;
if (kvm_enabled()) {
qemu_kvm_start_vcpu(env);
qemu_kvm_start_vcpu(cpu);
} else if (tcg_enabled()) {
qemu_tcg_init_vcpu(cpu);
} else {
qemu_dummy_start_vcpu(env);
qemu_dummy_start_vcpu(cpu);
}
}
void cpu_stop_current(void)
{
if (cpu_single_env) {
CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
cpu_single_cpu->stop = false;
cpu_single_cpu->stopped = true;
cpu_exit(cpu_single_env);
if (current_cpu) {
current_cpu->stop = false;
current_cpu->stopped = true;
cpu_exit(current_cpu);
qemu_cond_signal(&qemu_pause_cond);
}
}
void vm_stop(RunState state)
int vm_stop(RunState state)
{
if (qemu_in_vcpu_thread()) {
qemu_system_vmstop_request(state);
@ -1075,19 +1113,23 @@ void vm_stop(RunState state)
* vm_stop() has been requested.
*/
cpu_stop_current();
return;
return 0;
}
do_vm_stop(state);
return do_vm_stop(state);
}
/* does a state transition even if the VM is already stopped,
current state is forgotten forever */
void vm_stop_force_state(RunState state)
int vm_stop_force_state(RunState state)
{
if (runstate_is_running()) {
vm_stop(state);
return vm_stop(state);
} else {
runstate_set(state);
/* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
return bdrv_flush_all();
}
}
@ -1140,16 +1182,16 @@ static void tcg_exec_all(void)
next_cpu = first_cpu;
}
for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
CPUArchState *env = next_cpu;
CPUState *cpu = ENV_GET_CPU(env);
CPUState *cpu = next_cpu;
CPUArchState *env = cpu->env_ptr;
qemu_clock_enable(vm_clock,
(env->singlestep_enabled & SSTEP_NOTIMER) == 0);
(cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
if (cpu_can_run(cpu)) {
r = tcg_cpu_exec(env);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(env);
cpu_handle_guest_debug(cpu);
break;
}
} else if (cpu->stop || cpu->stopped) {
@ -1161,12 +1203,10 @@ static void tcg_exec_all(void)
void set_numa_modes(void)
{
CPUArchState *env;
CPUState *cpu;
int i;
for (env = first_cpu; env != NULL; env = env->next_cpu) {
cpu = ENV_GET_CPU(env);
for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
for (i = 0; i < nb_numa_nodes; i++) {
if (test_bit(cpu->cpu_index, node_cpumask[i])) {
cpu->numa_node = i;
@ -1186,18 +1226,30 @@ void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
CpuInfoList *qmp_query_cpus(Error **errp)
{
CpuInfoList *head = NULL, *cur_item = NULL;
CPUArchState *env;
CPUState *cpu;
for (env = first_cpu; env != NULL; env = env->next_cpu) {
CPUState *cpu = ENV_GET_CPU(env);
for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
CpuInfoList *info;
#if defined(TARGET_I386)
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
#elif defined(TARGET_PPC)
PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
CPUPPCState *env = &ppc_cpu->env;
#elif defined(TARGET_SPARC)
SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
CPUSPARCState *env = &sparc_cpu->env;
#elif defined(TARGET_MIPS)
MIPSCPU *mips_cpu = MIPS_CPU(cpu);
CPUMIPSState *env = &mips_cpu->env;
#endif
cpu_synchronize_state(env);
cpu_synchronize_state(cpu);
info = g_malloc0(sizeof(*info));
info->value = g_malloc0(sizeof(*info->value));
info->value->CPU = cpu->cpu_index;
info->value->current = (env == first_cpu);
info->value->current = (cpu == first_cpu);
info->value->halted = cpu->halted;
info->value->thread_id = cpu->thread_id;
#if defined(TARGET_I386)
@ -1233,7 +1285,6 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
{
FILE *f;
uint32_t l;
CPUArchState *env;
CPUState *cpu;
uint8_t buf[1024];
@ -1247,11 +1298,10 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
"a CPU number");
return;
}
env = cpu->env_ptr;
f = fopen(filename, "wb");
if (!f) {
error_set(errp, QERR_OPEN_FILE_FAILED, filename);
error_setg_file_open(errp, errno, filename);
return;
}
@ -1259,7 +1309,7 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
l = sizeof(buf);
if (l > size)
l = size;
cpu_memory_rw_debug(env, addr, buf, l, 0);
cpu_memory_rw_debug(cpu, addr, buf, l, 0);
if (fwrite(buf, 1, l, f) != l) {
error_set(errp, QERR_IO_ERROR);
goto exit;
@ -1281,7 +1331,7 @@ void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
f = fopen(filename, "wb");
if (!f) {
error_set(errp, QERR_OPEN_FILE_FAILED, filename);
error_setg_file_open(errp, errno, filename);
return;
}
@ -1305,11 +1355,14 @@ exit:
void qmp_inject_nmi(Error **errp)
{
#if defined(TARGET_I386)
CPUArchState *env;
CPUState *cs;
for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
for (env = first_cpu; env != NULL; env = env->next_cpu) {
if (!env->apic_state) {
cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_NMI);
cpu_interrupt(cs, CPU_INTERRUPT_NMI);
} else {
apic_deliver_nmi(env->apic_state);
}

View File

@ -158,6 +158,17 @@ void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
}
}
static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
{
ram_addr_t ram_addr;
if (qemu_ram_addr_from_host(ptr, &ram_addr) == NULL) {
fprintf(stderr, "Bad ram pointer %p\n", ptr);
abort();
}
return ram_addr;
}
static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
{
ram_addr_t ram_addr;
@ -175,11 +186,13 @@ static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length)
{
CPUState *cpu;
CPUArchState *env;
for (env = first_cpu; env != NULL; env = env->next_cpu) {
for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
int mmu_idx;
env = cpu->env_ptr;
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
unsigned int i;
@ -248,36 +261,37 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
target_ulong code_address;
uintptr_t addend;
CPUTLBEntry *te;
hwaddr iotlb;
hwaddr iotlb, xlat, sz;
assert(size >= TARGET_PAGE_SIZE);
if (size != TARGET_PAGE_SIZE) {
tlb_add_large_page(env, vaddr, size);
}
section = phys_page_find(address_space_memory.dispatch, paddr >> TARGET_PAGE_BITS);
sz = size;
section = address_space_translate_for_iotlb(&address_space_memory, paddr,
&xlat, &sz);
assert(sz >= TARGET_PAGE_SIZE);
#if defined(DEBUG_TLB)
printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
" prot=%x idx=%d pd=0x%08lx\n",
vaddr, paddr, prot, mmu_idx, pd);
" prot=%x idx=%d\n",
vaddr, paddr, prot, mmu_idx);
#endif
address = vaddr;
if (!(memory_region_is_ram(section->mr) ||
memory_region_is_romd(section->mr))) {
/* IO memory case (romd handled later) */
if (!memory_region_is_ram(section->mr) && !memory_region_is_romd(section->mr)) {
/* IO memory case */
address |= TLB_MMIO;
}
if (memory_region_is_ram(section->mr) ||
memory_region_is_romd(section->mr)) {
addend = (uintptr_t)memory_region_get_ram_ptr(section->mr)
+ memory_region_section_addr(section, paddr);
} else {
addend = 0;
} else {
/* TLB_MMIO for rom/romd handled below */
addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
}
code_address = address;
iotlb = memory_region_section_get_iotlb(env, section, vaddr, paddr, prot,
&address);
iotlb = memory_region_section_get_iotlb(env, section, vaddr, paddr, xlat,
prot, &address);
index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
env->iotlb[mmu_idx][index] = iotlb - vaddr;
@ -300,9 +314,7 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
/* Write access calls the I/O callback. */
te->addr_write = address | TLB_MMIO;
} else if (memory_region_is_ram(section->mr)
&& !cpu_physical_memory_is_dirty(
section->mr->ram_addr
+ memory_region_section_addr(section, paddr))) {
&& !cpu_physical_memory_is_dirty(section->mr->ram_addr + xlat)) {
te->addr_write = address | TLB_NOTDIRTY;
} else {
te->addr_write = address;
@ -332,12 +344,15 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
mr = iotlb_to_region(pd);
if (memory_region_is_unassigned(mr)) {
#if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
#else
cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x"
TARGET_FMT_lx "\n", addr);
#endif
CPUState *cpu = ENV_GET_CPU(env1);
CPUClass *cc = CPU_GET_CLASS(cpu);
if (cc->do_unassigned_access) {
cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
} else {
cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x"
TARGET_FMT_lx "\n", addr);
}
}
p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
return qemu_ram_addr_from_host_nofail(p);

View File

@ -13,3 +13,5 @@ CONFIG_IDE_QDEV=y
CONFIG_VMWARE_VGA=y
CONFIG_IDE_CMD646=y
CONFIG_I8259=y
CONFIG_MC146818RTC=y
CONFIG_ISA_TESTDEV=y

View File

@ -16,7 +16,9 @@ CONFIG_TWL92230=y
CONFIG_TSC2005=y
CONFIG_LM832X=y
CONFIG_TMP105=y
CONFIG_STELLARIS=y
CONFIG_STELLARIS_INPUT=y
CONFIG_STELLARIS_ENET=y
CONFIG_SSD0303=y
CONFIG_SSD0323=y
CONFIG_ADS7846=y
@ -29,8 +31,17 @@ CONFIG_SMC91C111=y
CONFIG_DS1338=y
CONFIG_PFLASH_CFI01=y
CONFIG_PFLASH_CFI02=y
CONFIG_MICRODRIVE=y
CONFIG_USB_MUSB=y
CONFIG_ARM9MPCORE=y
CONFIG_ARM11MPCORE=y
CONFIG_ARM15MPCORE=y
CONFIG_ARM_GIC=y
CONFIG_ARM_GIC_KVM=$(CONFIG_KVM)
CONFIG_ARM_TIMER=y
CONFIG_ARM_MPTIMER=y
CONFIG_PL011=y
CONFIG_PL022=y
CONFIG_PL031=y
@ -42,8 +53,28 @@ CONFIG_PL110=y
CONFIG_PL181=y
CONFIG_PL190=y
CONFIG_PL310=y
CONFIG_PL330=y
CONFIG_CADENCE=y
CONFIG_XGMAC=y
CONFIG_EXYNOS4=y
CONFIG_PXA2XX=y
CONFIG_BITBANG_I2C=y
CONFIG_FRAMEBUFFER=y
CONFIG_XILINX_SPIPS=y
CONFIG_A9SCU=y
CONFIG_MARVELL_88W8618=y
CONFIG_OMAP=y
CONFIG_TSC210X=y
CONFIG_BLIZZARD=y
CONFIG_ONENAND=y
CONFIG_TUSB6010=y
CONFIG_IMX=y
CONFIG_MAINSTONE=y
CONFIG_NSERIES=y
CONFIG_REALVIEW=y
CONFIG_ZAURUS=y
CONFIG_ZYNQ=y
CONFIG_VERSATILE_PCI=y
CONFIG_VERSATILE_I2C=y

View File

@ -1,5 +1,6 @@
# Default configuration for cris-softmmu
CONFIG_ETRAXFS=y
CONFIG_NAND=y
CONFIG_PTIMER=y
CONFIG_PFLASH_CFI02=y

View File

@ -1,8 +1,10 @@
# Default configuration for i386-softmmu
include pci.mak
include sound.mak
include usb.mak
CONFIG_VGA=y
CONFIG_QXL=$(CONFIG_SPICE)
CONFIG_VGA_PCI=y
CONFIG_VGA_ISA=y
CONFIG_VGA_CIRRUS=y
@ -16,15 +18,29 @@ CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_ACPI=y
CONFIG_APM=y
CONFIG_DMA=y
CONFIG_I8257=y
CONFIG_IDE_ISA=y
CONFIG_IDE_PIIX=y
CONFIG_NE2000_ISA=y
CONFIG_PIIX_PCI=y
CONFIG_SOUND=y
CONFIG_HPET=y
CONFIG_APPLESMC=y
CONFIG_I8259=y
CONFIG_PFLASH_CFI01=y
CONFIG_TPM_TIS=y
CONFIG_TPM_PASSTHROUGH=y
CONFIG_TPM_TIS=$(CONFIG_TPM)
CONFIG_PCI_HOTPLUG_OLD=y
CONFIG_MC146818RTC=y
CONFIG_PAM=y
CONFIG_PCI_PIIX=y
CONFIG_WDT_IB700=y
CONFIG_XEN_I386=$(CONFIG_XEN)
CONFIG_ISA_DEBUG=y
CONFIG_ISA_TESTDEV=y
CONFIG_VMPORT=y
CONFIG_SGA=y
CONFIG_LPC_ICH9=y
CONFIG_PCI_Q35=y
CONFIG_APIC=y
CONFIG_IOAPIC=y
CONFIG_ICC_BUS=y
CONFIG_PVPANIC=y

View File

@ -1,5 +1,9 @@
# Default configuration for lm32-softmmu
CONFIG_LM32=y
CONFIG_MILKYMIST=y
CONFIG_MILKYMIST_TMU2=$(CONFIG_OPENGL)
CONFIG_FRAMEBUFFER=y
CONFIG_PTIMER=y
CONFIG_PFLASH_CFI01=y
CONFIG_PFLASH_CFI02=y

View File

@ -2,5 +2,6 @@
include pci.mak
include usb.mak
CONFIG_COLDFIRE=y
CONFIG_GDBSTUB_XML=y
CONFIG_PTIMER=y

View File

@ -5,5 +5,7 @@ CONFIG_PFLASH_CFI01=y
CONFIG_SERIAL=y
CONFIG_XILINX=y
CONFIG_XILINX_AXI=y
CONFIG_XILINX_SPI=y
CONFIG_XILINX_ETHLITE=y
CONFIG_SSI=y
CONFIG_SSI_M25P80=y

View File

@ -5,5 +5,7 @@ CONFIG_PFLASH_CFI01=y
CONFIG_SERIAL=y
CONFIG_XILINX=y
CONFIG_XILINX_AXI=y
CONFIG_XILINX_SPI=y
CONFIG_XILINX_ETHLITE=y
CONFIG_SSI=y
CONFIG_SSI_M25P80=y

View File

@ -1,8 +1,8 @@
# Default configuration for mips-softmmu
include pci.mak
include sound.mak
include usb.mak
CONFIG_ISA_MMIO=y
CONFIG_ESP=y
CONFIG_VGA=y
CONFIG_VGA_PCI=y
@ -18,12 +18,11 @@ CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_ACPI=y
CONFIG_APM=y
CONFIG_DMA=y
CONFIG_I8257=y
CONFIG_PIIX4=y
CONFIG_IDE_ISA=y
CONFIG_IDE_PIIX=y
CONFIG_NE2000_ISA=y
CONFIG_SOUND=y
CONFIG_RC4030=y
CONFIG_DP8393X=y
CONFIG_DS1225Y=y
@ -32,3 +31,7 @@ CONFIG_PFLASH_CFI01=y
CONFIG_G364FB=y
CONFIG_I8259=y
CONFIG_JAZZ_LED=y
CONFIG_MC146818RTC=y
CONFIG_VT82C686=y
CONFIG_ISA_TESTDEV=y
CONFIG_EMPTY_SLOT=y

View File

@ -1,8 +1,8 @@
# Default configuration for mips64-softmmu
include pci.mak
include sound.mak
include usb.mak
CONFIG_ISA_MMIO=y
CONFIG_ESP=y
CONFIG_VGA=y
CONFIG_VGA_PCI=y
@ -18,12 +18,11 @@ CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_ACPI=y
CONFIG_APM=y
CONFIG_DMA=y
CONFIG_I8257=y
CONFIG_PIIX4=y
CONFIG_IDE_ISA=y
CONFIG_IDE_PIIX=y
CONFIG_NE2000_ISA=y
CONFIG_SOUND=y
CONFIG_RC4030=y
CONFIG_DP8393X=y
CONFIG_DS1225Y=y
@ -32,3 +31,7 @@ CONFIG_PFLASH_CFI01=y
CONFIG_G364FB=y
CONFIG_I8259=y
CONFIG_JAZZ_LED=y
CONFIG_MC146818RTC=y
CONFIG_VT82C686=y
CONFIG_ISA_TESTDEV=y
CONFIG_EMPTY_SLOT=y

View File

@ -1,8 +1,8 @@
# Default configuration for mips64el-softmmu
include pci.mak
include sound.mak
include usb.mak
CONFIG_ISA_MMIO=y
CONFIG_ESP=y
CONFIG_VGA=y
CONFIG_VGA_PCI=y
@ -18,13 +18,12 @@ CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_ACPI=y
CONFIG_APM=y
CONFIG_DMA=y
CONFIG_I8257=y
CONFIG_PIIX4=y
CONFIG_IDE_ISA=y
CONFIG_IDE_PIIX=y
CONFIG_IDE_VIA=y
CONFIG_NE2000_ISA=y
CONFIG_SOUND=y
CONFIG_RC4030=y
CONFIG_DP8393X=y
CONFIG_DS1225Y=y
@ -34,3 +33,7 @@ CONFIG_FULONG=y
CONFIG_G364FB=y
CONFIG_I8259=y
CONFIG_JAZZ_LED=y
CONFIG_MC146818RTC=y
CONFIG_VT82C686=y
CONFIG_ISA_TESTDEV=y
CONFIG_EMPTY_SLOT=y

View File

@ -1,8 +1,8 @@
# Default configuration for mipsel-softmmu
include pci.mak
include sound.mak
include usb.mak
CONFIG_ISA_MMIO=y
CONFIG_ESP=y
CONFIG_VGA=y
CONFIG_VGA_PCI=y
@ -18,12 +18,11 @@ CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_ACPI=y
CONFIG_APM=y
CONFIG_DMA=y
CONFIG_I8257=y
CONFIG_PIIX4=y
CONFIG_IDE_ISA=y
CONFIG_IDE_PIIX=y
CONFIG_NE2000_ISA=y
CONFIG_SOUND=y
CONFIG_RC4030=y
CONFIG_DP8393X=y
CONFIG_DS1225Y=y
@ -32,3 +31,7 @@ CONFIG_PFLASH_CFI01=y
CONFIG_G364FB=y
CONFIG_I8259=y
CONFIG_JAZZ_LED=y
CONFIG_MC146818RTC=y
CONFIG_VT82C686=y
CONFIG_ISA_TESTDEV=y
CONFIG_EMPTY_SLOT=y

View File

@ -0,0 +1,5 @@
# Default configuration for moxie-softmmu
CONFIG_MC146818RTC=y
CONFIG_SERIAL=y
CONFIG_VGA=y

View File

@ -9,10 +9,15 @@ CONFIG_NE2000_PCI=y
CONFIG_EEPRO100_PCI=y
CONFIG_PCNET_PCI=y
CONFIG_PCNET_COMMON=y
CONFIG_AC97=y
CONFIG_HDA=y
CONFIG_ES1370=y
CONFIG_LSI_SCSI_PCI=y
CONFIG_VMW_PVSCSI_SCSI_PCI=y
CONFIG_MEGASAS_SCSI_PCI=y
CONFIG_RTL8139_PCI=y
CONFIG_E1000_PCI=y
CONFIG_VMXNET3_PCI=y
CONFIG_IDE_CORE=y
CONFIG_IDE_QDEV=y
CONFIG_IDE_PCI=y
@ -22,3 +27,6 @@ CONFIG_ESP_PCI=y
CONFIG_SERIAL=y
CONFIG_SERIAL_PCI=y
CONFIG_IPACK=y
CONFIG_WDT_IB6300ESB=y
CONFIG_PCI_TESTDEV=y
CONFIG_NVME_PCI=y

View File

@ -1,6 +1,7 @@
# Default configuration for ppc-softmmu
include pci.mak
include sound.mak
include usb.mak
CONFIG_GDBSTUB_XML=y
CONFIG_ISA_MMIO=y
@ -13,7 +14,7 @@ CONFIG_PARALLEL=y
CONFIG_I8254=y
CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_DMA=y
CONFIG_I8257=y
CONFIG_I82374=y
CONFIG_OPENPIC=y
CONFIG_PREP_PCI=y
@ -34,10 +35,15 @@ CONFIG_IDE_ISA=y
CONFIG_IDE_CMD646=y
CONFIG_IDE_MACIO=y
CONFIG_NE2000_ISA=y
CONFIG_SOUND=y
CONFIG_PFLASH_CFI01=y
CONFIG_PFLASH_CFI02=y
CONFIG_PTIMER=y
CONFIG_I8259=y
CONFIG_XILINX=y
CONFIG_E500=$(CONFIG_FDT)
CONFIG_XILINX_ETHLITE=y
CONFIG_OPENPIC=y
CONFIG_E500=y
CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
# For PReP
CONFIG_MC146818RTC=y
CONFIG_ISA_TESTDEV=y

View File

@ -1,6 +1,7 @@
# Default configuration for ppc64-softmmu
include pci.mak
include sound.mak
include usb.mak
CONFIG_GDBSTUB_XML=y
CONFIG_ISA_MMIO=y
@ -13,7 +14,7 @@ CONFIG_PARALLEL=y
CONFIG_I8254=y
CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_DMA=y
CONFIG_I8257=y
CONFIG_I82374=y
CONFIG_OPENPIC=y
CONFIG_PREP_PCI=y
@ -34,11 +35,24 @@ CONFIG_IDE_ISA=y
CONFIG_IDE_CMD646=y
CONFIG_IDE_MACIO=y
CONFIG_NE2000_ISA=y
CONFIG_SOUND=y
CONFIG_PFLASH_CFI01=y
CONFIG_PFLASH_CFI02=y
CONFIG_PTIMER=y
CONFIG_I8259=y
CONFIG_XILINX=y
CONFIG_PSERIES=$(CONFIG_FDT)
CONFIG_E500=$(CONFIG_FDT)
CONFIG_XILINX_ETHLITE=y
CONFIG_OPENPIC=y
CONFIG_PSERIES=y
CONFIG_E500=y
CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
# For pSeries
CONFIG_XICS=$(CONFIG_PSERIES)
# For PReP
CONFIG_I82378=y
CONFIG_I8259=y
CONFIG_I8254=y
CONFIG_PCSPK=y
CONFIG_I82374=y
CONFIG_I8257=y
CONFIG_MC146818RTC=y
CONFIG_ISA_TESTDEV=y

View File

@ -1,6 +1,7 @@
# Default configuration for ppcemb-softmmu
include pci.mak
include sound.mak
include usb.mak
CONFIG_GDBSTUB_XML=y
CONFIG_ISA_MMIO=y
@ -12,7 +13,7 @@ CONFIG_SERIAL=y
CONFIG_I8254=y
CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_DMA=y
CONFIG_I8257=y
CONFIG_OPENPIC=y
CONFIG_PREP_PCI=y
CONFIG_MACIO=y
@ -29,10 +30,15 @@ CONFIG_IDE_ISA=y
CONFIG_IDE_CMD646=y
CONFIG_IDE_MACIO=y
CONFIG_NE2000_ISA=y
CONFIG_SOUND=y
CONFIG_PFLASH_CFI01=y
CONFIG_PFLASH_CFI02=y
CONFIG_PTIMER=y
CONFIG_I8259=y
CONFIG_XILINX=y
CONFIG_E500=$(CONFIG_FDT)
CONFIG_XILINX_ETHLITE=y
CONFIG_OPENPIC=y
CONFIG_E500=y
CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
# For PReP
CONFIG_MC146818RTC=y
CONFIG_ISA_TESTDEV=y

View File

@ -1 +1,2 @@
CONFIG_VIRTIO=y
CONFIG_SCLPCONSOLE=y

View File

@ -5,4 +5,14 @@ include usb.mak
CONFIG_SERIAL=y
CONFIG_PTIMER=y
CONFIG_PFLASH_CFI02=y
CONFIG_ISA_MMIO=y
CONFIG_SH4=y
CONFIG_IDE_MMIO=y
CONFIG_SM501=y
CONFIG_ISA_TESTDEV=y
CONFIG_I82378=y
CONFIG_I8259=y
CONFIG_I8254=y
CONFIG_PCSPK=y
CONFIG_I82374=y
CONFIG_I8257=y
CONFIG_MC146818RTC=y

View File

@ -5,4 +5,14 @@ include usb.mak
CONFIG_SERIAL=y
CONFIG_PTIMER=y
CONFIG_PFLASH_CFI02=y
CONFIG_ISA_MMIO=y
CONFIG_SH4=y
CONFIG_IDE_MMIO=y
CONFIG_SM501=y
CONFIG_ISA_TESTDEV=y
CONFIG_I82378=y
CONFIG_I8259=y
CONFIG_I8254=y
CONFIG_PCSPK=y
CONFIG_I82374=y
CONFIG_I8257=y
CONFIG_MC146818RTC=y

View File

@ -0,0 +1,4 @@
CONFIG_SB16=y
CONFIG_ADLIB=y
CONFIG_GUS=y
CONFIG_CS4231A=y

View File

@ -8,3 +8,11 @@ CONFIG_PTIMER=y
CONFIG_FDC=y
CONFIG_EMPTY_SLOT=y
CONFIG_PCNET_COMMON=y
CONFIG_LANCE=y
CONFIG_TCX=y
CONFIG_SLAVIO=y
CONFIG_CS4231=y
CONFIG_GRLIB=y
CONFIG_STP2000=y
CONFIG_ECCMEMCTL=y
CONFIG_SUN4M=y

View File

@ -13,3 +13,6 @@ CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_IDE_ISA=y
CONFIG_IDE_CMD646=y
CONFIG_PCI_APB=y
CONFIG_MC146818RTC=y
CONFIG_ISA_TESTDEV=y

View File

@ -1,8 +1,10 @@
# Default configuration for x86_64-softmmu
include pci.mak
include sound.mak
include usb.mak
CONFIG_VGA=y
CONFIG_QXL=$(CONFIG_SPICE)
CONFIG_VGA_PCI=y
CONFIG_VGA_ISA=y
CONFIG_VGA_CIRRUS=y
@ -16,15 +18,29 @@ CONFIG_PCKBD=y
CONFIG_FDC=y
CONFIG_ACPI=y
CONFIG_APM=y
CONFIG_DMA=y
CONFIG_I8257=y
CONFIG_IDE_ISA=y
CONFIG_IDE_PIIX=y
CONFIG_NE2000_ISA=y
CONFIG_PIIX_PCI=y
CONFIG_SOUND=y
CONFIG_HPET=y
CONFIG_APPLESMC=y
CONFIG_I8259=y
CONFIG_PFLASH_CFI01=y
CONFIG_TPM_TIS=y
CONFIG_TPM_PASSTHROUGH=y
CONFIG_TPM_TIS=$(CONFIG_TPM)
CONFIG_PCI_HOTPLUG_OLD=y
CONFIG_MC146818RTC=y
CONFIG_PAM=y
CONFIG_PCI_PIIX=y
CONFIG_WDT_IB700=y
CONFIG_XEN_I386=$(CONFIG_XEN)
CONFIG_ISA_DEBUG=y
CONFIG_ISA_TESTDEV=y
CONFIG_VMPORT=y
CONFIG_SGA=y
CONFIG_LPC_ICH9=y
CONFIG_PCI_Q35=y
CONFIG_APIC=y
CONFIG_IOAPIC=y
CONFIG_ICC_BUS=y
CONFIG_PVPANIC=y

View File

@ -21,6 +21,7 @@
#include "config.h"
#include "qemu-common.h"
#include "sysemu/device_tree.h"
#include "sysemu/sysemu.h"
#include "hw/loader.h"
#include "qemu/option.h"
#include "qemu/config-file.h"
@ -213,7 +214,7 @@ uint32_t qemu_devtree_get_phandle(void *fdt, const char *path)
uint32_t r;
r = fdt_get_phandle(fdt, findnode_nofail(fdt, path));
if (r <= 0) {
if (r == 0) {
fprintf(stderr, "%s: Couldn't get phandle for %s: %s\n", __func__,
path, fdt_strerror(r));
exit(1);
@ -239,15 +240,8 @@ uint32_t qemu_devtree_alloc_phandle(void *fdt)
* which phandle id to start allocting phandles.
*/
if (!phandle) {
QemuOpts *machine_opts;
machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
if (machine_opts) {
const char *phandle_start;
phandle_start = qemu_opt_get(machine_opts, "phandle_start");
if (phandle_start) {
phandle = strtoul(phandle_start, NULL, 0);
}
}
phandle = qemu_opt_get_number(qemu_get_machine_opts(),
"phandle_start", 0);
}
if (!phandle) {
@ -307,15 +301,43 @@ int qemu_devtree_add_subnode(void *fdt, const char *name)
void qemu_devtree_dumpdtb(void *fdt, int size)
{
QemuOpts *machine_opts;
const char *dumpdtb = qemu_opt_get(qemu_get_machine_opts(), "dumpdtb");
machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
if (machine_opts) {
const char *dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
if (dumpdtb) {
/* Dump the dtb to a file and quit */
exit(g_file_set_contents(dumpdtb, fdt, size, NULL) ? 0 : 1);
if (dumpdtb) {
/* Dump the dtb to a file and quit */
exit(g_file_set_contents(dumpdtb, fdt, size, NULL) ? 0 : 1);
}
}
int qemu_devtree_setprop_sized_cells_from_array(void *fdt,
const char *node_path,
const char *property,
int numvalues,
uint64_t *values)
{
uint32_t *propcells;
uint64_t value;
int cellnum, vnum, ncells;
uint32_t hival;
propcells = g_new0(uint32_t, numvalues * 2);
cellnum = 0;
for (vnum = 0; vnum < numvalues; vnum++) {
ncells = values[vnum * 2];
if (ncells != 1 && ncells != 2) {
return -1;
}
value = values[vnum * 2 + 1];
hival = cpu_to_be32(value >> 32);
if (ncells > 1) {
propcells[cellnum++] = hival;
} else if (hival != 0) {
return -1;
}
propcells[cellnum++] = cpu_to_be32(value);
}
return qemu_devtree_setprop(fdt, node_path, property, propcells,
cellnum * sizeof(uint32_t));
}

Some files were not shown because too many files have changed in this diff Show More