Fix race conditions in new user-only vma tracking.

Add tcg backend paired register allocation.
 Cleanup tcg backend function call abi.
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmO3kZEdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/JpwgAj9kwpiWehGWrpQp9
 rbEL+Fsx+SDhnoLVpF6nmSB1nkDqdgkdnhyRaLX9wM69bnocsGppZ5sd57J/cH3m
 WiODVVbWP80WHonx5EN4htQv99TZWqVmXVl11DwOfsRUmINl4GG4kvHOOABd8hdc
 39eRgGBBMyMShc6MUJiToyjEAcZPcGAiHkSW9YDGbvzhlloNWh46eLP1bdW3UJWK
 UiEwPpXqg+L0V8nuuQnSFoPr5FIJmmoTeiGCRHXtvgOT7J8/6eKUESpfcKkHq1ye
 dwcJQATuZip3+hyCCVveiZ86TQ81RMp9en1qw+HVzfed1Ial3Tk+tqiDqZJFm25b
 GMpa5g==
 =OjPl
 -----END PGP SIGNATURE-----

Merge tag 'pull-tcg-20230105' of https://gitlab.com/rth7680/qemu into staging

Fix race conditions in new user-only vma tracking.
Add tcg backend paired register allocation.
Cleanup tcg backend function call abi.

# gpg: Signature made Fri 06 Jan 2023 03:12:17 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* tag 'pull-tcg-20230105' of https://gitlab.com/rth7680/qemu: (47 commits)
  tests/tcg/multiarch: add vma-pthread.c
  accel/tcg: Handle false negative lookup in page_check_range
  accel/tcg: Use g_free_rcu for user-exec interval trees
  accel/tcg: Fix tb_invalidate_phys_page_unwind
  tcg: Add TCGHelperInfo argument to tcg_out_call
  tcg/aarch64: Merge tcg_out_callr into tcg_out_call
  tcg: Move ffi_cif pointer into TCGHelperInfo
  tcg: Factor init_ffi_layouts() out of tcg_context_init()
  tcg: Convert typecode_to_ffi from array to function
  tcg: Reorg function calls
  tcg: Use output_pref wrapper function
  tcg: Vary the allocation size for TCGOp
  tcg: Pass number of arguments to tcg_emit_op() / tcg_op_insert_*()
  accel/tcg/plugin: Use copy_op in append_{udata,mem}_cb
  accel/tcg/plugin: Avoid duplicate copy in copy_call
  accel/tcg/plugin: Don't search for the function pointer index
  tcg: Use TCG_CALL_ARG_EVEN for TCI special case
  tcg: Replace TCG_TARGET_EXTEND_ARGS with TCG_TARGET_CALL_ARG_I32
  tcg: Replace TCG_TARGET_CALL_ALIGN_ARGS with TCG_TARGET_CALL_ARG_I64
  tcg: Introduce TCGCallReturnKind and TCGCallArgumentKind
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2023-01-06 15:40:37 +00:00
commit aaa90fede5
50 changed files with 2635 additions and 1763 deletions

View File

@ -1356,7 +1356,6 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
MemoryRegionSection *section;
MemoryRegion *mr;
uint64_t val;
bool locked = false;
MemTxResult r;
section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
@ -1367,11 +1366,11 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
cpu_io_recompile(cpu, retaddr);
}
if (!qemu_mutex_iothread_locked()) {
qemu_mutex_lock_iothread();
locked = true;
{
QEMU_IOTHREAD_LOCK_GUARD();
r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
}
r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
if (r != MEMTX_OK) {
hwaddr physaddr = mr_offset +
section->offset_within_address_space -
@ -1380,10 +1379,6 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
mmu_idx, full->attrs, r, retaddr);
}
if (locked) {
qemu_mutex_unlock_iothread();
}
return val;
}
@ -1410,7 +1405,6 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
hwaddr mr_offset;
MemoryRegionSection *section;
MemoryRegion *mr;
bool locked = false;
MemTxResult r;
section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
@ -1427,11 +1421,11 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
*/
save_iotlb_data(cpu, section, mr_offset);
if (!qemu_mutex_iothread_locked()) {
qemu_mutex_lock_iothread();
locked = true;
{
QEMU_IOTHREAD_LOCK_GUARD();
r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
}
r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
if (r != MEMTX_OK) {
hwaddr physaddr = mr_offset +
section->offset_within_address_space -
@ -1441,9 +1435,6 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
MMU_DATA_STORE, mmu_idx, full->attrs, r,
retaddr);
}
if (locked) {
qemu_mutex_unlock_iothread();
}
}
static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)

View File

@ -258,10 +258,13 @@ static TCGOp *rm_ops(TCGOp *op)
static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op)
{
*begin_op = QTAILQ_NEXT(*begin_op, link);
tcg_debug_assert(*begin_op);
op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc);
memcpy(op->args, (*begin_op)->args, sizeof(op->args));
TCGOp *old_op = QTAILQ_NEXT(*begin_op, link);
unsigned nargs = old_op->nargs;
*begin_op = old_op;
op = tcg_op_insert_after(tcg_ctx, op, old_op->opc, nargs);
memcpy(op->args, old_op->args, sizeof(op->args[0]) * nargs);
return op;
}
@ -381,32 +384,23 @@ static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op)
static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func,
void *func, int *cb_idx)
{
TCGOp *old_op;
int func_idx;
/* copy all ops until the call */
do {
op = copy_op_nocheck(begin_op, op);
} while (op->opc != INDEX_op_call);
/* fill in the op call */
op->param1 = (*begin_op)->param1;
op->param2 = (*begin_op)->param2;
old_op = *begin_op;
TCGOP_CALLI(op) = TCGOP_CALLI(old_op);
TCGOP_CALLO(op) = TCGOP_CALLO(old_op);
tcg_debug_assert(op->life == 0);
if (*cb_idx == -1) {
int i;
/*
* Instead of working out the position of the callback in args[], just
* look for @empty_func, since it should be a unique pointer.
*/
for (i = 0; i < MAX_OPC_PARAM_ARGS; i++) {
if ((uintptr_t)(*begin_op)->args[i] == (uintptr_t)empty_func) {
*cb_idx = i;
break;
}
}
tcg_debug_assert(i < MAX_OPC_PARAM_ARGS);
}
op->args[*cb_idx] = (uintptr_t)func;
op->args[*cb_idx + 1] = (*begin_op)->args[*cb_idx + 1];
func_idx = TCGOP_CALLO(op) + TCGOP_CALLI(op);
*cb_idx = func_idx;
op->args[func_idx] = (uintptr_t)func;
return op;
}
@ -424,11 +418,11 @@ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb,
op = copy_const_ptr(&begin_op, op, cb->userp);
/* copy the ld_i32, but note that we only have to copy it once */
begin_op = QTAILQ_NEXT(begin_op, link);
tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32);
if (*cb_idx == -1) {
op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32);
memcpy(op->args, begin_op->args, sizeof(op->args));
op = copy_op(&begin_op, op, INDEX_op_ld_i32);
} else {
begin_op = QTAILQ_NEXT(begin_op, link);
tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32);
}
/* call */
@ -471,11 +465,11 @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb,
op = copy_const_ptr(&begin_op, op, cb->userp);
/* copy the ld_i32, but note that we only have to copy it once */
begin_op = QTAILQ_NEXT(begin_op, link);
tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32);
if (*cb_idx == -1) {
op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32);
memcpy(op->args, begin_op->args, sizeof(op->args));
op = copy_op(&begin_op, op, INDEX_op_ld_i32);
} else {
begin_op = QTAILQ_NEXT(begin_op, link);
tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32);
}
/* extu_tl_i64 */

View File

@ -1024,43 +1024,51 @@ void tb_invalidate_phys_page(tb_page_addr_t addr)
*/
bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
{
assert(pc != 0);
#ifdef TARGET_HAS_PRECISE_SMC
assert_memory_lock();
{
TranslationBlock *current_tb = tcg_tb_lookup(pc);
bool current_tb_modified = false;
TranslationBlock *tb;
PageForEachNext n;
TranslationBlock *current_tb;
bool current_tb_modified;
TranslationBlock *tb;
PageForEachNext n;
addr &= TARGET_PAGE_MASK;
PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) {
if (current_tb == tb &&
(tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
/*
* If we are modifying the current TB, we must stop its
* execution. We could be more precise by checking that
* the modification is after the current PC, but it would
* require a specialized function to partially restore
* the CPU state.
*/
current_tb_modified = true;
cpu_restore_state_from_tb(current_cpu, current_tb, pc);
}
tb_phys_invalidate__locked(tb);
}
if (current_tb_modified) {
/* Force execution of one insn next time. */
CPUState *cpu = current_cpu;
cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
return true;
}
/*
* Without precise smc semantics, or when outside of a TB,
* we can skip to invalidate.
*/
#ifndef TARGET_HAS_PRECISE_SMC
pc = 0;
#endif
if (!pc) {
tb_invalidate_phys_page(addr);
return false;
}
assert_memory_lock();
current_tb = tcg_tb_lookup(pc);
addr &= TARGET_PAGE_MASK;
current_tb_modified = false;
PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) {
if (current_tb == tb &&
(tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
/*
* If we are modifying the current TB, we must stop its
* execution. We could be more precise by checking that
* the modification is after the current PC, but it would
* require a specialized function to partially restore
* the CPU state.
*/
current_tb_modified = true;
cpu_restore_state_from_tb(current_cpu, current_tb, pc);
}
tb_phys_invalidate__locked(tb);
}
if (current_tb_modified) {
/* Force execution of one insn next time. */
CPUState *cpu = current_cpu;
cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
return true;
}
#else
tb_invalidate_phys_page(addr);
#endif /* TARGET_HAS_PRECISE_SMC */
return false;
}
#else

View File

@ -22,6 +22,7 @@
#include "exec/exec-all.h"
#include "tcg/tcg.h"
#include "qemu/bitops.h"
#include "qemu/rcu.h"
#include "exec/cpu_ldst.h"
#include "exec/translate-all.h"
#include "exec/helper-proto.h"
@ -136,6 +137,7 @@ bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
}
typedef struct PageFlagsNode {
struct rcu_head rcu;
IntervalTreeNode itree;
int flags;
} PageFlagsNode;
@ -266,7 +268,7 @@ static bool pageflags_unset(target_ulong start, target_ulong last)
}
} else if (p_last <= last) {
/* Range completely covers node -- remove it. */
g_free(p);
g_free_rcu(p, rcu);
} else {
/* Truncate the node from the start. */
p->itree.start = last + 1;
@ -311,7 +313,7 @@ static void pageflags_create_merge(target_ulong start, target_ulong last,
if (prev) {
if (next) {
prev->itree.last = next->itree.last;
g_free(next);
g_free_rcu(next, rcu);
} else {
prev->itree.last = last;
}
@ -376,7 +378,7 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
p->flags = merge_flags;
} else {
interval_tree_remove(&p->itree, &pageflags_root);
g_free(p);
g_free_rcu(p, rcu);
}
goto done;
}
@ -421,7 +423,7 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
p->flags = merge_flags;
} else {
interval_tree_remove(&p->itree, &pageflags_root);
g_free(p);
g_free_rcu(p, rcu);
}
if (p_last < last) {
start = p_last + 1;
@ -462,7 +464,7 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
p->itree.start = last + 1;
interval_tree_insert(&p->itree, &pageflags_root);
} else {
g_free(p);
g_free_rcu(p, rcu);
goto restart;
}
if (set_flags) {
@ -523,6 +525,8 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
int page_check_range(target_ulong start, target_ulong len, int flags)
{
target_ulong last;
int locked; /* tri-state: =0: unlocked, +1: global, -1: local */
int ret;
if (len == 0) {
return 0; /* trivial length */
@ -533,42 +537,67 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
return -1; /* wrap around */
}
locked = have_mmap_lock();
while (true) {
PageFlagsNode *p = pageflags_find(start, last);
int missing;
if (!p) {
return -1; /* entire region invalid */
if (!locked) {
/*
* Lockless lookups have false negatives.
* Retry with the lock held.
*/
mmap_lock();
locked = -1;
p = pageflags_find(start, last);
}
if (!p) {
ret = -1; /* entire region invalid */
break;
}
}
if (start < p->itree.start) {
return -1; /* initial bytes invalid */
ret = -1; /* initial bytes invalid */
break;
}
missing = flags & ~p->flags;
if (missing & PAGE_READ) {
return -1; /* page not readable */
ret = -1; /* page not readable */
break;
}
if (missing & PAGE_WRITE) {
if (!(p->flags & PAGE_WRITE_ORG)) {
return -1; /* page not writable */
ret = -1; /* page not writable */
break;
}
/* Asking about writable, but has been protected: undo. */
if (!page_unprotect(start, 0)) {
return -1;
ret = -1;
break;
}
/* TODO: page_unprotect should take a range, not a single page. */
if (last - start < TARGET_PAGE_SIZE) {
return 0; /* ok */
ret = 0; /* ok */
break;
}
start += TARGET_PAGE_SIZE;
continue;
}
if (last <= p->itree.last) {
return 0; /* ok */
ret = 0; /* ok */
break;
}
start = p->itree.last + 1;
}
/* Release the lock if acquired locally. */
if (locked < 0) {
mmap_unlock();
}
return ret;
}
void page_protect(tb_page_addr_t address)
@ -779,6 +808,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
#define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES)
typedef struct TargetPageDataNode {
struct rcu_head rcu;
IntervalTreeNode itree;
char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned));
} TargetPageDataNode;
@ -801,11 +831,11 @@ void page_reset_target_data(target_ulong start, target_ulong end)
n = next,
next = next ? interval_tree_iter_next(n, start, last) : NULL) {
target_ulong n_start, n_last, p_ofs, p_len;
TargetPageDataNode *t;
TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
if (n->start >= start && n->last <= last) {
interval_tree_remove(n, &targetdata_root);
g_free(n);
g_free_rcu(t, rcu);
continue;
}
@ -819,7 +849,6 @@ void page_reset_target_data(target_ulong start, target_ulong end)
n_last = MIN(last, n->last);
p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
t = container_of(n, TargetPageDataNode, itree);
memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE);
}
}

View File

@ -1,3 +1,5 @@
.. _atomics-ref:
=========================
Atomic operations in QEMU
=========================

View File

@ -9,6 +9,7 @@ are only implementing things for HW accelerated hypervisors.
:maxdepth: 2
tcg
tcg-ops
decodetree
multi-thread-tcg
tcg-icount

941
docs/devel/tcg-ops.rst Normal file
View File

@ -0,0 +1,941 @@
.. _tcg-ops-ref:
*******************************
TCG Intermediate Representation
*******************************
Introduction
============
TCG (Tiny Code Generator) began as a generic backend for a C
compiler. It was simplified to be used in QEMU. It also has its roots
in the QOP code generator written by Paul Brook.
Definitions
===========
TCG receives RISC-like *TCG ops* and performs some optimizations on them,
including liveness analysis and trivial constant expression
evaluation. TCG ops are then implemented in the host CPU back end,
also known as the TCG target.
The TCG *target* is the architecture for which we generate the
code. It is of course not the same as the "target" of QEMU which is
the emulated architecture. As TCG started as a generic C backend used
for cross compiling, it is assumed that the TCG target is different
from the host, although it is never the case for QEMU.
In this document, we use *guest* to specify what architecture we are
emulating; *target* always means the TCG target, the machine on which
we are running QEMU.
A TCG *function* corresponds to a QEMU Translated Block (TB).
A TCG *temporary* is a variable only live in a basic block. Temporaries are allocated explicitly in each function.
A TCG *local temporary* is a variable only live in a function. Local temporaries are allocated explicitly in each function.
A TCG *global* is a variable which is live in all the functions
(equivalent of a C global variable). They are defined before the
functions defined. A TCG global can be a memory location (e.g. a QEMU
CPU register), a fixed host register (e.g. the QEMU CPU state pointer)
or a memory location which is stored in a register outside QEMU TBs
(not implemented yet).
A TCG *basic block* corresponds to a list of instructions terminated
by a branch instruction.
An operation with *undefined behavior* may result in a crash.
An operation with *unspecified behavior* shall not crash. However,
the result may be one of several possibilities so may be considered
an *undefined result*.
Intermediate representation
===========================
Introduction
------------
TCG instructions operate on variables which are temporaries, local
temporaries or globals. TCG instructions and variables are strongly
typed. Two types are supported: 32 bit integers and 64 bit
integers. Pointers are defined as an alias to 32 bit or 64 bit
integers depending on the TCG target word size.
Each instruction has a fixed number of output variable operands, input
variable operands and always constant operands.
The notable exception is the call instruction which has a variable
number of outputs and inputs.
In the textual form, output operands usually come first, followed by
input operands, followed by constant operands. The output type is
included in the instruction name. Constants are prefixed with a '$'.
.. code-block:: none
add_i32 t0, t1, t2 /* (t0 <- t1 + t2) */
Assumptions
-----------
Basic blocks
^^^^^^^^^^^^
* Basic blocks end after branches (e.g. brcond_i32 instruction),
goto_tb and exit_tb instructions.
* Basic blocks start after the end of a previous basic block, or at a
set_label instruction.
After the end of a basic block, the content of temporaries is
destroyed, but local temporaries and globals are preserved.
Floating point types
^^^^^^^^^^^^^^^^^^^^
* Floating point types are not supported yet
Pointers
^^^^^^^^
* Depending on the TCG target, pointer size is 32 bit or 64
bit. The type ``TCG_TYPE_PTR`` is an alias to ``TCG_TYPE_I32`` or
``TCG_TYPE_I64``.
Helpers
^^^^^^^
* Using the tcg_gen_helper_x_y it is possible to call any function
taking i32, i64 or pointer types. By default, before calling a helper,
all globals are stored at their canonical location and it is assumed
that the function can modify them. By default, the helper is allowed to
modify the CPU state or raise an exception.
This can be overridden using the following function modifiers:
- ``TCG_CALL_NO_READ_GLOBALS`` means that the helper does not read globals,
either directly or via an exception. They will not be saved to their
canonical locations before calling the helper.
- ``TCG_CALL_NO_WRITE_GLOBALS`` means that the helper does not modify any globals.
They will only be saved to their canonical location before calling helpers,
but they won't be reloaded afterwards.
- ``TCG_CALL_NO_SIDE_EFFECTS`` means that the call to the function is removed if
the return value is not used.
Note that ``TCG_CALL_NO_READ_GLOBALS`` implies ``TCG_CALL_NO_WRITE_GLOBALS``.
On some TCG targets (e.g. x86), several calling conventions are
supported.
Branches
^^^^^^^^
* Use the instruction 'br' to jump to a label.
Code Optimizations
------------------
When generating instructions, you can count on at least the following
optimizations:
- Single instructions are simplified, e.g.
.. code-block:: none
and_i32 t0, t0, $0xffffffff
is suppressed.
- A liveness analysis is done at the basic block level. The
information is used to suppress moves from a dead variable to
another one. It is also used to remove instructions which compute
dead results. The later is especially useful for condition code
optimization in QEMU.
In the following example:
.. code-block:: none
add_i32 t0, t1, t2
add_i32 t0, t0, $1
mov_i32 t0, $1
only the last instruction is kept.
Instruction Reference
=====================
Function call
-------------
.. list-table::
* - call *<ret>* *<params>* ptr
- | call function 'ptr' (pointer type)
|
| *<ret>* optional 32 bit or 64 bit return value
| *<params>* optional 32 bit or 64 bit parameters
Jumps/Labels
------------
.. list-table::
* - set_label $label
- | Define label 'label' at the current program point.
* - br $label
- | Jump to label.
* - brcond_i32/i64 *t0*, *t1*, *cond*, *label*
- | Conditional jump if *t0* *cond* *t1* is true. *cond* can be:
|
| ``TCG_COND_EQ``
| ``TCG_COND_NE``
| ``TCG_COND_LT /* signed */``
| ``TCG_COND_GE /* signed */``
| ``TCG_COND_LE /* signed */``
| ``TCG_COND_GT /* signed */``
| ``TCG_COND_LTU /* unsigned */``
| ``TCG_COND_GEU /* unsigned */``
| ``TCG_COND_LEU /* unsigned */``
| ``TCG_COND_GTU /* unsigned */``
Arithmetic
----------
.. list-table::
* - add_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* + *t2*
* - sub_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* - *t2*
* - neg_i32/i64 *t0*, *t1*
- | *t0* = -*t1* (two's complement)
* - mul_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* * *t2*
* - div_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* / *t2* (signed)
| Undefined behavior if division by zero or overflow.
* - divu_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* / *t2* (unsigned)
| Undefined behavior if division by zero.
* - rem_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* % *t2* (signed)
| Undefined behavior if division by zero or overflow.
* - remu_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* % *t2* (unsigned)
| Undefined behavior if division by zero.
Logical
-------
.. list-table::
* - and_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* & *t2*
* - or_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* | *t2*
* - xor_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* ^ *t2*
* - not_i32/i64 *t0*, *t1*
- | *t0* = ~\ *t1*
* - andc_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* & ~\ *t2*
* - eqv_i32/i64 *t0*, *t1*, *t2*
- | *t0* = ~(*t1* ^ *t2*), or equivalently, *t0* = *t1* ^ ~\ *t2*
* - nand_i32/i64 *t0*, *t1*, *t2*
- | *t0* = ~(*t1* & *t2*)
* - nor_i32/i64 *t0*, *t1*, *t2*
- | *t0* = ~(*t1* | *t2*)
* - orc_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* | ~\ *t2*
* - clz_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* ? clz(*t1*) : *t2*
* - ctz_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* ? ctz(*t1*) : *t2*
* - ctpop_i32/i64 *t0*, *t1*
- | *t0* = number of bits set in *t1*
|
| With *ctpop* short for "count population", matching
| the function name used in ``include/qemu/host-utils.h``.
Shifts/Rotates
--------------
.. list-table::
* - shl_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* << *t2*
| Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64)
* - shr_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* >> *t2* (unsigned)
| Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64)
* - sar_i32/i64 *t0*, *t1*, *t2*
- | *t0* = *t1* >> *t2* (signed)
| Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64)
* - rotl_i32/i64 *t0*, *t1*, *t2*
- | Rotation of *t2* bits to the left
| Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64)
* - rotr_i32/i64 *t0*, *t1*, *t2*
- | Rotation of *t2* bits to the right.
| Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64)
Misc
----
.. list-table::
* - mov_i32/i64 *t0*, *t1*
- | *t0* = *t1*
| Move *t1* to *t0* (both operands must have the same type).
* - ext8s_i32/i64 *t0*, *t1*
ext8u_i32/i64 *t0*, *t1*
ext16s_i32/i64 *t0*, *t1*
ext16u_i32/i64 *t0*, *t1*
ext32s_i64 *t0*, *t1*
ext32u_i64 *t0*, *t1*
- | 8, 16 or 32 bit sign/zero extension (both operands must have the same type)
* - bswap16_i32/i64 *t0*, *t1*, *flags*
- | 16 bit byte swap on the low bits of a 32/64 bit input.
|
| If *flags* & ``TCG_BSWAP_IZ``, then *t1* is known to be zero-extended from bit 15.
| If *flags* & ``TCG_BSWAP_OZ``, then *t0* will be zero-extended from bit 15.
| If *flags* & ``TCG_BSWAP_OS``, then *t0* will be sign-extended from bit 15.
|
| If neither ``TCG_BSWAP_OZ`` nor ``TCG_BSWAP_OS`` are set, then the bits of *t0* above bit 15 may contain any value.
* - bswap32_i64 *t0*, *t1*, *flags*
- | 32 bit byte swap on a 64-bit value. The flags are the same as for bswap16,
except they apply from bit 31 instead of bit 15.
* - bswap32_i32 *t0*, *t1*, *flags*
bswap64_i64 *t0*, *t1*, *flags*
- | 32/64 bit byte swap. The flags are ignored, but still present
for consistency with the other bswap opcodes.
* - discard_i32/i64 *t0*
- | Indicate that the value of *t0* won't be used later. It is useful to
force dead code elimination.
* - deposit_i32/i64 *dest*, *t1*, *t2*, *pos*, *len*
- | Deposit *t2* as a bitfield into *t1*, placing the result in *dest*.
|
| The bitfield is described by *pos*/*len*, which are immediate values:
|
| *len* - the length of the bitfield
| *pos* - the position of the first bit, counting from the LSB
|
| For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field
at bit 8. This operation would be equivalent to
|
| *dest* = (*t1* & ~0x0f00) | ((*t2* << 8) & 0x0f00)
* - extract_i32/i64 *dest*, *t1*, *pos*, *len*
sextract_i32/i64 *dest*, *t1*, *pos*, *len*
- | Extract a bitfield from *t1*, placing the result in *dest*.
|
| The bitfield is described by *pos*/*len*, which are immediate values,
as above for deposit. For extract_*, the result will be extended
to the left with zeros; for sextract_*, the result will be extended
to the left with copies of the bitfield sign bit at *pos* + *len* - 1.
|
| For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field
at bit 8. This operation would be equivalent to
|
| *dest* = (*t1* << 20) >> 28
|
| (using an arithmetic right shift).
* - extract2_i32/i64 *dest*, *t1*, *t2*, *pos*
- | For N = {32,64}, extract an N-bit quantity from the concatenation
of *t2*:*t1*, beginning at *pos*. The tcg_gen_extract2_{i32,i64} expander
accepts 0 <= *pos* <= N as inputs. The backend code generator will
not see either 0 or N as inputs for these opcodes.
* - extrl_i64_i32 *t0*, *t1*
- | For 64-bit hosts only, extract the low 32-bits of input *t1* and place it
into 32-bit output *t0*. Depending on the host, this may be a simple move,
or may require additional canonicalization.
* - extrh_i64_i32 *t0*, *t1*
- | For 64-bit hosts only, extract the high 32-bits of input *t1* and place it
into 32-bit output *t0*. Depending on the host, this may be a simple shift,
or may require additional canonicalization.
Conditional moves
-----------------
.. list-table::
* - setcond_i32/i64 *dest*, *t1*, *t2*, *cond*
- | *dest* = (*t1* *cond* *t2*)
|
| Set *dest* to 1 if (*t1* *cond* *t2*) is true, otherwise set to 0.
* - movcond_i32/i64 *dest*, *c1*, *c2*, *v1*, *v2*, *cond*
- | *dest* = (*c1* *cond* *c2* ? *v1* : *v2*)
|
| Set *dest* to *v1* if (*c1* *cond* *c2*) is true, otherwise set to *v2*.
Type conversions
----------------
.. list-table::
* - ext_i32_i64 *t0*, *t1*
- | Convert *t1* (32 bit) to *t0* (64 bit) and does sign extension
* - extu_i32_i64 *t0*, *t1*
- | Convert *t1* (32 bit) to *t0* (64 bit) and does zero extension
* - trunc_i64_i32 *t0*, *t1*
- | Truncate *t1* (64 bit) to *t0* (32 bit)
* - concat_i32_i64 *t0*, *t1*, *t2*
- | Construct *t0* (64-bit) taking the low half from *t1* (32 bit) and the high half
from *t2* (32 bit).
* - concat32_i64 *t0*, *t1*, *t2*
- | Construct *t0* (64-bit) taking the low half from *t1* (64 bit) and the high half
from *t2* (64 bit).
Load/Store
----------
.. list-table::
* - ld_i32/i64 *t0*, *t1*, *offset*
ld8s_i32/i64 *t0*, *t1*, *offset*
ld8u_i32/i64 *t0*, *t1*, *offset*
ld16s_i32/i64 *t0*, *t1*, *offset*
ld16u_i32/i64 *t0*, *t1*, *offset*
ld32s_i64 t0, *t1*, *offset*
ld32u_i64 t0, *t1*, *offset*
- | *t0* = read(*t1* + *offset*)
|
| Load 8, 16, 32 or 64 bits with or without sign extension from host memory.
*offset* must be a constant.
* - st_i32/i64 *t0*, *t1*, *offset*
st8_i32/i64 *t0*, *t1*, *offset*
st16_i32/i64 *t0*, *t1*, *offset*
st32_i64 *t0*, *t1*, *offset*
- | write(*t0*, *t1* + *offset*)
|
| Write 8, 16, 32 or 64 bits to host memory.
All this opcodes assume that the pointed host memory doesn't correspond
to a global. In the latter case the behaviour is unpredictable.
Multiword arithmetic support
----------------------------
.. list-table::
* - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high*
sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high*
- | Similar to add/sub, except that the double-word inputs *t1* and *t2* are
formed from two single-word arguments, and the double-word output *t0*
is returned in two single-word outputs.
* - mulu2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2*
- | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full
double-word product *t0*. The latter is returned in two single-word outputs.
* - muls2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2*
- | Similar to mulu2, except the two inputs *t1* and *t2* are signed.
* - mulsh_i32/i64 *t0*, *t1*, *t2*
muluh_i32/i64 *t0*, *t1*, *t2*
- | Provide the high part of a signed or unsigned multiply, respectively.
|
| If mulu2/muls2 are not provided by the backend, the tcg-op generator
can obtain the same results by emitting a pair of opcodes, mul + muluh/mulsh.
Memory Barrier support
----------------------
.. list-table::
* - mb *<$arg>*
- | Generate a target memory barrier instruction to ensure memory ordering
as being enforced by a corresponding guest memory barrier instruction.
|
| The ordering enforced by the backend may be stricter than the ordering
required by the guest. It cannot be weaker. This opcode takes a constant
argument which is required to generate the appropriate barrier
instruction. The backend should take care to emit the target barrier
instruction only when necessary i.e., for SMP guests and when MTTCG is
enabled.
|
| The guest translators should generate this opcode for all guest instructions
which have ordering side effects.
|
| Please see :ref:`atomics-ref` for more information on memory barriers.
64-bit guest on 32-bit host support
-----------------------------------
The following opcodes are internal to TCG. Thus they are to be implemented by
32-bit host code generators, but are not to be emitted by guest translators.
They are emitted as needed by inline functions within ``tcg-op.h``.
.. list-table::
* - brcond2_i32 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *cond*, *label*
- | Similar to brcond, except that the 64-bit values *t0* and *t1*
are formed from two 32-bit arguments.
* - setcond2_i32 *dest*, *t1_low*, *t1_high*, *t2_low*, *t2_high*, *cond*
- | Similar to setcond, except that the 64-bit values *t1* and *t2* are
formed from two 32-bit arguments. The result is a 32-bit value.
QEMU specific operations
------------------------
.. list-table::
* - exit_tb *t0*
- | Exit the current TB and return the value *t0* (word type).
* - goto_tb *index*
- | Exit the current TB and jump to the TB index *index* (constant) if the
current TB was linked to this TB. Otherwise execute the next
instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
at most once with each slot index per TB.
* - lookup_and_goto_ptr *tb_addr*
- | Look up a TB address *tb_addr* and jump to it if valid. If not valid,
jump to the TCG epilogue to go back to the exec loop.
|
| This operation is optional. If the TCG backend does not implement the
goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
* - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx*
qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx*
qemu_st8_i32 *t0*, *t1*, *flags*, *memidx*
- | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest
address *t1*. The _i32/_i64 size applies to the size of the input/output
register *t0* only. The address *t1* is always sized according to the guest,
and the width of the memory operation is controlled by *flags*.
|
| Both *t0* and *t1* may be split into little-endian ordered pairs of registers
if dealing with 64-bit quantities on a 32-bit host.
|
| The *memidx* selects the qemu tlb index to use (e.g. user or kernel access).
The flags are the MemOp bits, selecting the sign, width, and endianness
of the memory access.
|
| For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
64-bit memory access specified in *flags*.
|
| For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
the memory operation is known to be 8-bit. This allows the backend to
provide a different set of register constraints.
Host vector operations
----------------------
All of the vector ops have two parameters, ``TCGOP_VECL`` & ``TCGOP_VECE``.
The former specifies the length of the vector in log2 64-bit units; the
latter specifies the length of the element (if applicable) in log2 8-bit units.
E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32.
.. list-table::
* - mov_vec *v0*, *v1*
ld_vec *v0*, *t1*
st_vec *v0*, *t1*
- | Move, load and store.
* - dup_vec *v0*, *r1*
- | Duplicate the low N bits of *r1* into VECL/VECE copies across *v0*.
* - dupi_vec *v0*, *c*
- | Similarly, for a constant.
| Smaller values will be replicated to host register size by the expanders.
* - dup2_vec *v0*, *r1*, *r2*
- | Duplicate *r2*:*r1* into VECL/64 copies across *v0*. This opcode is
only present for 32-bit hosts.
* - add_vec *v0*, *v1*, *v2*
- | *v0* = *v1* + *v2*, in elements across the vector.
* - sub_vec *v0*, *v1*, *v2*
- | Similarly, *v0* = *v1* - *v2*.
* - mul_vec *v0*, *v1*, *v2*
- | Similarly, *v0* = *v1* * *v2*.
* - neg_vec *v0*, *v1*
- | Similarly, *v0* = -*v1*.
* - abs_vec *v0*, *v1*
- | Similarly, *v0* = *v1* < 0 ? -*v1* : *v1*, in elements across the vector.
* - smin_vec *v0*, *v1*, *v2*
umin_vec *v0*, *v1*, *v2*
- | Similarly, *v0* = MIN(*v1*, *v2*), for signed and unsigned element types.
* - smax_vec *v0*, *v1*, *v2*
umax_vec *v0*, *v1*, *v2*
- | Similarly, *v0* = MAX(*v1*, *v2*), for signed and unsigned element types.
* - ssadd_vec *v0*, *v1*, *v2*
sssub_vec *v0*, *v1*, *v2*
usadd_vec *v0*, *v1*, *v2*
ussub_vec *v0*, *v1*, *v2*
- | Signed and unsigned saturating addition and subtraction.
|
| If the true result is not representable within the element type, the
element is set to the minimum or maximum value for the type.
* - and_vec *v0*, *v1*, *v2*
or_vec *v0*, *v1*, *v2*
xor_vec *v0*, *v1*, *v2*
andc_vec *v0*, *v1*, *v2*
orc_vec *v0*, *v1*, *v2*
not_vec *v0*, *v1*
- | Similarly, logical operations with and without complement.
|
| Note that VECE is unused.
* - shli_vec *v0*, *v1*, *i2*
shls_vec *v0*, *v1*, *s2*
- | Shift all elements from v1 by a scalar *i2*/*s2*. I.e.
.. code-block:: c
for (i = 0; i < VECL/VECE; ++i) {
v0[i] = v1[i] << s2;
}
* - shri_vec *v0*, *v1*, *i2*
sari_vec *v0*, *v1*, *i2*
rotli_vec *v0*, *v1*, *i2*
shrs_vec *v0*, *v1*, *s2*
sars_vec *v0*, *v1*, *s2*
- | Similarly for logical and arithmetic right shift, and left rotate.
* - shlv_vec *v0*, *v1*, *v2*
- | Shift elements from *v1* by elements from *v2*. I.e.
.. code-block:: c
for (i = 0; i < VECL/VECE; ++i) {
v0[i] = v1[i] << v2[i];
}
* - shrv_vec *v0*, *v1*, *v2*
sarv_vec *v0*, *v1*, *v2*
rotlv_vec *v0*, *v1*, *v2*
rotrv_vec *v0*, *v1*, *v2*
- | Similarly for logical and arithmetic right shift, and rotates.
* - cmp_vec *v0*, *v1*, *v2*, *cond*
- | Compare vectors by element, storing -1 for true and 0 for false.
* - bitsel_vec *v0*, *v1*, *v2*, *v3*
- | Bitwise select, *v0* = (*v2* & *v1*) | (*v3* & ~\ *v1*), across the entire vector.
* - cmpsel_vec *v0*, *c1*, *c2*, *v3*, *v4*, *cond*
- | Select elements based on comparison results:
.. code-block:: c
for (i = 0; i < n; ++i) {
v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i].
}
**Note 1**: Some shortcuts are defined when the last operand is known to be
a constant (e.g. addi for add, movi for mov).
**Note 2**: When using TCG, the opcodes must never be generated directly
as some of them may not be available as "real" opcodes. Always use the
function tcg_gen_xxx(args).
Backend
=======
``tcg-target.h`` contains the target specific definitions. ``tcg-target.c.inc``
contains the target specific code; it is #included by ``tcg/tcg.c``, rather
than being a standalone C file.
Assumptions
-----------
The target word size (``TCG_TARGET_REG_BITS``) is expected to be 32 bit or
64 bit. It is expected that the pointer has the same size as the word.
On a 32 bit target, all 64 bit operations are converted to 32 bits. A
few specific operations must be implemented to allow it (see add2_i32,
sub2_i32, brcond2_i32).
On a 64 bit target, the values are transferred between 32 and 64-bit
registers using the following ops:
- trunc_shr_i64_i32
- ext_i32_i64
- extu_i32_i64
They ensure that the values are correctly truncated or extended when
moved from a 32-bit to a 64-bit register or vice-versa. Note that the
trunc_shr_i64_i32 is an optional op. It is not necessary to implement
it if all the following conditions are met:
- 64-bit registers can hold 32-bit values
- 32-bit values in a 64-bit register do not need to stay zero or
sign extended
- all 32-bit TCG ops ignore the high part of 64-bit registers
Floating point operations are not supported in this version. A
previous incarnation of the code generator had full support of them,
but it is better to concentrate on integer operations first.
Constraints
----------------
GCC like constraints are used to define the constraints of every
instruction. Memory constraints are not supported in this
version. Aliases are specified in the input operands as for GCC.
The same register may be used for both an input and an output, even when
they are not explicitly aliased. If an op expands to multiple target
instructions then care must be taken to avoid clobbering input values.
GCC style "early clobber" outputs are supported, with '``&``'.
A target can define specific register or constant constraints. If an
operation uses a constant input constraint which does not allow all
constants, it must also accept registers in order to have a fallback.
The constraint '``i``' is defined generically to accept any constant.
The constraint '``r``' is not defined generically, but is consistently
used by each backend to indicate all registers.
The movi_i32 and movi_i64 operations must accept any constants.
The mov_i32 and mov_i64 operations must accept any registers of the
same type.
The ld/st/sti instructions must accept signed 32 bit constant offsets.
This can be implemented by reserving a specific register in which to
compute the address if the offset is too big.
The ld/st instructions must accept any destination (ld) or source (st)
register.
The sti instruction may fail if it cannot store the given constant.
Function call assumptions
-------------------------
- The only supported types for parameters and return value are: 32 and
64 bit integers and pointer.
- The stack grows downwards.
- The first N parameters are passed in registers.
- The next parameters are passed on the stack by storing them as words.
- Some registers are clobbered during the call.
- The function can return 0 or 1 value in registers. On a 32 bit
target, functions must be able to return 2 values in registers for
64 bit return type.
Recommended coding rules for best performance
=============================================
- Use globals to represent the parts of the QEMU CPU state which are
often modified, e.g. the integer registers and the condition
codes. TCG will be able to use host registers to store them.
- Avoid globals stored in fixed registers. They must be used only to
store the pointer to the CPU state and possibly to store a pointer
to a register window.
- Use temporaries. Use local temporaries only when really needed,
e.g. when you need to use a value after a jump. Local temporaries
introduce a performance hit in the current TCG implementation: their
content is saved to memory at end of each basic block.
- Free temporaries and local temporaries when they are no longer used
(tcg_temp_free). Since tcg_const_x() also creates a temporary, you
should free it after it is used. Freeing temporaries does not yield
a better generated code, but it reduces the memory usage of TCG and
the speed of the translation.
- Don't hesitate to use helpers for complicated or seldom used guest
instructions. There is little performance advantage in using TCG to
implement guest instructions taking more than about twenty TCG
instructions. Note that this rule of thumb is more applicable to
helpers doing complex logic or arithmetic, where the C compiler has
scope to do a good job of optimisation; it is less relevant where
the instruction is mostly doing loads and stores, and in those cases
inline TCG may still be faster for longer sequences.
- The hard limit on the number of TCG instructions you can generate
per guest instruction is set by ``MAX_OP_PER_INSTR`` in ``exec-all.h`` --
you cannot exceed this without risking a buffer overrun.
- Use the 'discard' instruction if you know that TCG won't be able to
prove that a given global is "dead" at a given program point. The
x86 guest uses it to improve the condition codes optimisation.

View File

@ -9,7 +9,7 @@ which make it relatively easily portable and simple while achieving good
performances.
QEMU's dynamic translation backend is called TCG, for "Tiny Code
Generator". For more information, please take a look at ``tcg/README``.
Generator". For more information, please take a look at :ref:`tcg-ops-ref`.
The following sections outline some notable features and implementation
details of QEMU's dynamic translator.

View File

@ -235,6 +235,7 @@ static void cpu_common_initfn(Object *obj)
/* the default value is changed by qemu_init_vcpu() for softmmu */
cpu->nr_cores = 1;
cpu->nr_threads = 1;
cpu->cflags_next_tb = -1;
qemu_mutex_init(&cpu->work_mutex);
QSIMPLEQ_INIT(&cpu->work_list);

View File

@ -32,17 +32,12 @@ static void cpu_mips_irq_request(void *opaque, int irq, int level)
MIPSCPU *cpu = opaque;
CPUMIPSState *env = &cpu->env;
CPUState *cs = CPU(cpu);
bool locked = false;
if (irq < 0 || irq > 7) {
return;
}
/* Make sure locking works even if BQL is already held by the caller */
if (!qemu_mutex_iothread_locked()) {
locked = true;
qemu_mutex_lock_iothread();
}
QEMU_IOTHREAD_LOCK_GUARD();
if (level) {
env->CP0_Cause |= 1 << (irq + CP0Ca_IP);
@ -59,10 +54,6 @@ static void cpu_mips_irq_request(void *opaque, int irq, int level)
} else {
cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
}
if (locked) {
qemu_mutex_unlock_iothread();
}
}
void cpu_mips_irq_init_cpu(MIPSCPU *cpu)

View File

@ -44,13 +44,9 @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level)
{
CPUPPCState *env = &cpu->env;
unsigned int old_pending;
bool locked = false;
/* We may already have the BQL if coming from the reset path */
if (!qemu_mutex_iothread_locked()) {
locked = true;
qemu_mutex_lock_iothread();
}
QEMU_IOTHREAD_LOCK_GUARD();
old_pending = env->pending_interrupts;
@ -67,10 +63,6 @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level)
trace_ppc_irq_set_exit(env, irq, level, env->pending_interrupts,
CPU(cpu)->interrupt_request);
if (locked) {
qemu_mutex_unlock_iothread();
}
}
/* PowerPC 6xx / 7xx internal IRQ controller */

View File

@ -133,6 +133,6 @@
#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \
DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7)
/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
/* MAX_CALL_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
#endif /* EXEC_HELPER_HEAD_H */

View File

@ -343,6 +343,35 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line);
*/
void qemu_mutex_unlock_iothread(void);
/**
* QEMU_IOTHREAD_LOCK_GUARD
*
* Wrap a block of code in a conditional qemu_mutex_{lock,unlock}_iothread.
*/
typedef struct IOThreadLockAuto IOThreadLockAuto;
static inline IOThreadLockAuto *qemu_iothread_auto_lock(const char *file,
int line)
{
if (qemu_mutex_iothread_locked()) {
return NULL;
}
qemu_mutex_lock_iothread_impl(file, line);
/* Anything non-NULL causes the cleanup function to be called */
return (IOThreadLockAuto *)(uintptr_t)1;
}
static inline void qemu_iothread_auto_unlock(IOThreadLockAuto *l)
{
qemu_mutex_unlock_iothread();
}
G_DEFINE_AUTOPTR_CLEANUP_FUNC(IOThreadLockAuto, qemu_iothread_auto_unlock)
#define QEMU_IOTHREAD_LOCK_GUARD() \
g_autoptr(IOThreadLockAuto) _iothread_lock_auto __attribute__((unused)) \
= qemu_iothread_auto_lock(__FILE__, __LINE__)
/*
* qemu_cond_wait_iothread: Wait on condition for the main loop mutex
*

View File

@ -667,35 +667,12 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2);
}
#else /* TCG_TARGET_REG_BITS == 32 */
static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2,
tcg_target_long offset)
{
tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset);
}
void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2,
tcg_target_long offset)
{
tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset);
}
static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2,
tcg_target_long offset)
{
tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
}
static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
{
tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
}
static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
{
tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
}
void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_discard_i64(TCGv_i64 arg);
void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg);
@ -841,7 +818,7 @@ static inline void tcg_gen_plugin_cb_start(unsigned from, unsigned type,
static inline void tcg_gen_plugin_cb_end(void)
{
tcg_emit_op(INDEX_op_plugin_cb_end);
tcg_emit_op(INDEX_op_plugin_cb_end, 0);
}
#if TARGET_LONG_BITS == 32

View File

@ -38,19 +38,7 @@
/* XXX: make safe guess about sizes */
#define MAX_OP_PER_INSTR 266
#if HOST_LONG_BITS == 32
#define MAX_OPC_PARAM_PER_ARG 2
#else
#define MAX_OPC_PARAM_PER_ARG 1
#endif
#define MAX_OPC_PARAM_IARGS 7
#define MAX_OPC_PARAM_OARGS 1
#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
/* A Call op needs up to 4 + 2N parameters on 32-bit archs,
* and up to 4 + N parameters on 64-bit archs
* (N = number of input arguments + output arguments). */
#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
#define MAX_CALL_IARGS 7
#define CPU_TEMP_BUF_NLONGS 128
#define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long))
@ -294,7 +282,8 @@ typedef enum TCGType {
TCG_TYPE_V128,
TCG_TYPE_V256,
TCG_TYPE_COUNT, /* number of different types */
/* Number of different types (integer not enum) */
#define TCG_TYPE_COUNT (TCG_TYPE_V256 + 1)
/* An alias for the size of the host register. */
#if TCG_TARGET_REG_BITS == 32
@ -318,6 +307,22 @@ typedef enum TCGType {
#endif
} TCGType;
/**
* tcg_type_size
* @t: type
*
* Return the size of the type in bytes.
*/
static inline int tcg_type_size(TCGType t)
{
unsigned i = t;
if (i >= TCG_TYPE_V64) {
tcg_debug_assert(i < TCG_TYPE_COUNT);
i -= TCG_TYPE_V64 - 1;
}
return 4 << i;
}
/**
* get_alignment_bits
* @memop: MemOp value
@ -408,9 +413,6 @@ typedef TCGv_ptr TCGv_env;
#define TCG_CALL_NO_RWG_SE (TCG_CALL_NO_RWG | TCG_CALL_NO_SE)
#define TCG_CALL_NO_WG_SE (TCG_CALL_NO_WG | TCG_CALL_NO_SE)
/* Used to align parameters. See the comment before tcgv_i32_temp. */
#define TCG_CALL_DUMMY_ARG ((TCGArg)0)
/*
* Flags for the bswap opcodes.
* If IZ, the input is zero-extended, otherwise unknown.
@ -456,6 +458,7 @@ typedef struct TCGTemp {
unsigned int mem_coherent:1;
unsigned int mem_allocated:1;
unsigned int temp_allocated:1;
unsigned int temp_subindex:1;
int64_t val;
struct TCGTemp *mem_base;
@ -475,34 +478,34 @@ typedef struct TCGTempSet {
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet;
/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
There are never more than 2 outputs, which means that we can store all
dead + sync data within 16 bits. */
#define DEAD_ARG 4
#define SYNC_ARG 1
typedef uint16_t TCGLifeData;
/*
* With 1 128-bit output, a 32-bit host requires 4 output parameters,
* which leaves a maximum of 28 other slots. Which is enough for 7
* 128-bit operands.
*/
#define DEAD_ARG (1 << 4)
#define SYNC_ARG (1 << 0)
typedef uint32_t TCGLifeData;
/* The layout here is designed to avoid a bitfield crossing of
a 32-bit boundary, which would cause GCC to add extra padding. */
typedef struct TCGOp {
TCGOpcode opc : 8; /* 8 */
TCGOpcode opc : 8;
unsigned nargs : 8;
/* Parameters for this opcode. See below. */
unsigned param1 : 4; /* 12 */
unsigned param2 : 4; /* 16 */
unsigned param1 : 8;
unsigned param2 : 8;
/* Lifetime data of the operands. */
unsigned life : 16; /* 32 */
TCGLifeData life;
/* Next and previous opcodes. */
QTAILQ_ENTRY(TCGOp) link;
/* Arguments for the opcode. */
TCGArg args[MAX_OPC_PARAM];
/* Register preferences for the output(s). */
TCGRegSet output_pref[2];
/* Arguments for the opcode. */
TCGArg args[];
} TCGOp;
#define TCGOP_CALLI(X) (X)->param1
@ -514,6 +517,11 @@ typedef struct TCGOp {
/* Make sure operands fit in the bitfields above. */
QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
static inline TCGRegSet output_pref(const TCGOp *op, unsigned i)
{
return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0;
}
typedef struct TCGProfile {
int64_t cpu_exec_time;
int64_t tb_count1;
@ -737,18 +745,6 @@ static inline TCGv_vec temp_tcgv_vec(TCGTemp *t)
return (TCGv_vec)temp_tcgv_i32(t);
}
#if TCG_TARGET_REG_BITS == 32
static inline TCGv_i32 TCGV_LOW(TCGv_i64 t)
{
return temp_tcgv_i32(tcgv_i64_temp(t));
}
static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t)
{
return temp_tcgv_i32(tcgv_i64_temp(t) + 1);
}
#endif
static inline TCGArg tcg_get_insn_param(TCGOp *op, int arg)
{
return op->args[arg];
@ -951,6 +947,8 @@ typedef struct TCGArgConstraint {
unsigned ct : 16;
unsigned alias_index : 4;
unsigned sort_index : 4;
unsigned pair_index : 4;
unsigned pair : 2; /* 0: none, 1: first, 2: second, 3: second alias */
bool oalias : 1;
bool ialias : 1;
bool newreg : 1;
@ -1006,10 +1004,12 @@ bool tcg_op_supported(TCGOpcode op);
void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args);
TCGOp *tcg_emit_op(TCGOpcode opc);
TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs);
void tcg_op_remove(TCGContext *s, TCGOp *op);
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc);
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc);
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op,
TCGOpcode opc, unsigned nargs);
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op,
TCGOpcode opc, unsigned nargs);
/**
* tcg_remove_ops_after:

View File

@ -469,6 +469,7 @@ if get_option('tcg').allowed()
endif
if get_option('tcg_interpreter')
tcg_arch = 'tci'
config_host += { 'CONFIG_TCG_INTERPRETER': 'y' }
elif host_arch == 'x86_64'
tcg_arch = 'i386'
elif host_arch == 'ppc64'
@ -2550,9 +2551,6 @@ foreach target : target_dirs
if sym == 'CONFIG_TCG' or target in accelerator_targets.get(sym, [])
config_target += { sym: 'y' }
config_all += { sym: 'y' }
if sym == 'CONFIG_TCG' and tcg_arch == 'tci'
config_target += { 'CONFIG_TCG_INTERPRETER': 'y' }
endif
if target in modular_tcg
config_target += { 'CONFIG_TCG_MODULAR': 'y' }
else

View File

@ -2163,22 +2163,13 @@ static int ppc_next_unmasked_interrupt(CPUPPCState *env)
void ppc_maybe_interrupt(CPUPPCState *env)
{
CPUState *cs = env_cpu(env);
bool locked = false;
if (!qemu_mutex_iothread_locked()) {
locked = true;
qemu_mutex_lock_iothread();
}
QEMU_IOTHREAD_LOCK_GUARD();
if (ppc_next_unmasked_interrupt(env)) {
cpu_interrupt(cs, CPU_INTERRUPT_HARD);
} else {
cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
}
if (locked) {
qemu_mutex_unlock_iothread();
}
}
#if defined(TARGET_PPC64)

View File

@ -22,6 +22,7 @@
#include "qemu/main-loop.h"
#include "exec/exec-all.h"
#include "sysemu/kvm.h"
#include "sysemu/tcg.h"
#include "helper_regs.h"
#include "power8-pmu.h"
#include "cpu-models.h"
@ -203,17 +204,10 @@ void cpu_interrupt_exittb(CPUState *cs)
{
/*
* We don't need to worry about translation blocks
* when running with KVM.
* unless running with TCG.
*/
if (kvm_enabled()) {
return;
}
if (!qemu_mutex_iothread_locked()) {
qemu_mutex_lock_iothread();
cpu_interrupt(cs, CPU_INTERRUPT_EXITTB);
qemu_mutex_unlock_iothread();
} else {
if (tcg_enabled()) {
QEMU_IOTHREAD_LOCK_GUARD();
cpu_interrupt(cs, CPU_INTERRUPT_EXITTB);
}
}

View File

@ -610,7 +610,6 @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value)
CPURISCVState *env = &cpu->env;
CPUState *cs = CPU(cpu);
uint64_t gein, vsgein = 0, vstip = 0, old = env->mip;
bool locked = false;
if (riscv_cpu_virt_enabled(env)) {
gein = get_field(env->hstatus, HSTATUS_VGEIN);
@ -621,10 +620,7 @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value)
mask = ((mask == MIP_VSTIP) && env->vstime_irq) ? 0 : mask;
vstip = env->vstime_irq ? MIP_VSTIP : 0;
if (!qemu_mutex_iothread_locked()) {
locked = true;
qemu_mutex_lock_iothread();
}
QEMU_IOTHREAD_LOCK_GUARD();
env->mip = (env->mip & ~mask) | (value & mask);
@ -634,10 +630,6 @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value)
cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
}
if (locked) {
qemu_mutex_unlock_iothread();
}
return old;
}

View File

@ -163,13 +163,6 @@ static inline void gen_update_fprs_dirty(DisasContext *dc, int rd)
/* floating point registers moves */
static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src)
{
#if TCG_TARGET_REG_BITS == 32
if (src & 1) {
return TCGV_LOW(cpu_fpr[src / 2]);
} else {
return TCGV_HIGH(cpu_fpr[src / 2]);
}
#else
TCGv_i32 ret = get_temp_i32(dc);
if (src & 1) {
tcg_gen_extrl_i64_i32(ret, cpu_fpr[src / 2]);
@ -177,22 +170,16 @@ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src)
tcg_gen_extrh_i64_i32(ret, cpu_fpr[src / 2]);
}
return ret;
#endif
}
static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v)
{
#if TCG_TARGET_REG_BITS == 32
if (dst & 1) {
tcg_gen_mov_i32(TCGV_LOW(cpu_fpr[dst / 2]), v);
} else {
tcg_gen_mov_i32(TCGV_HIGH(cpu_fpr[dst / 2]), v);
}
#else
TCGv_i64 t = (TCGv_i64)v;
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_extu_i32_i64(t, v);
tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t,
(dst & 1 ? 0 : 32), 32);
#endif
tcg_temp_free_i64(t);
gen_update_fprs_dirty(dc, dst);
}

View File

@ -1,784 +0,0 @@
Tiny Code Generator - Fabrice Bellard.
1) Introduction
TCG (Tiny Code Generator) began as a generic backend for a C
compiler. It was simplified to be used in QEMU. It also has its roots
in the QOP code generator written by Paul Brook.
2) Definitions
TCG receives RISC-like "TCG ops" and performs some optimizations on them,
including liveness analysis and trivial constant expression
evaluation. TCG ops are then implemented in the host CPU back end,
also known as the TCG "target".
The TCG "target" is the architecture for which we generate the
code. It is of course not the same as the "target" of QEMU which is
the emulated architecture. As TCG started as a generic C backend used
for cross compiling, it is assumed that the TCG target is different
from the host, although it is never the case for QEMU.
In this document, we use "guest" to specify what architecture we are
emulating; "target" always means the TCG target, the machine on which
we are running QEMU.
A TCG "function" corresponds to a QEMU Translated Block (TB).
A TCG "temporary" is a variable only live in a basic
block. Temporaries are allocated explicitly in each function.
A TCG "local temporary" is a variable only live in a function. Local
temporaries are allocated explicitly in each function.
A TCG "global" is a variable which is live in all the functions
(equivalent of a C global variable). They are defined before the
functions defined. A TCG global can be a memory location (e.g. a QEMU
CPU register), a fixed host register (e.g. the QEMU CPU state pointer)
or a memory location which is stored in a register outside QEMU TBs
(not implemented yet).
A TCG "basic block" corresponds to a list of instructions terminated
by a branch instruction.
An operation with "undefined behavior" may result in a crash.
An operation with "unspecified behavior" shall not crash. However,
the result may be one of several possibilities so may be considered
an "undefined result".
3) Intermediate representation
3.1) Introduction
TCG instructions operate on variables which are temporaries, local
temporaries or globals. TCG instructions and variables are strongly
typed. Two types are supported: 32 bit integers and 64 bit
integers. Pointers are defined as an alias to 32 bit or 64 bit
integers depending on the TCG target word size.
Each instruction has a fixed number of output variable operands, input
variable operands and always constant operands.
The notable exception is the call instruction which has a variable
number of outputs and inputs.
In the textual form, output operands usually come first, followed by
input operands, followed by constant operands. The output type is
included in the instruction name. Constants are prefixed with a '$'.
add_i32 t0, t1, t2 (t0 <- t1 + t2)
3.2) Assumptions
* Basic blocks
- Basic blocks end after branches (e.g. brcond_i32 instruction),
goto_tb and exit_tb instructions.
- Basic blocks start after the end of a previous basic block, or at a
set_label instruction.
After the end of a basic block, the content of temporaries is
destroyed, but local temporaries and globals are preserved.
* Floating point types are not supported yet
* Pointers: depending on the TCG target, pointer size is 32 bit or 64
bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or
TCG_TYPE_I64.
* Helpers:
Using the tcg_gen_helper_x_y it is possible to call any function
taking i32, i64 or pointer types. By default, before calling a helper,
all globals are stored at their canonical location and it is assumed
that the function can modify them. By default, the helper is allowed to
modify the CPU state or raise an exception.
This can be overridden using the following function modifiers:
- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals,
either directly or via an exception. They will not be saved to their
canonical locations before calling the helper.
- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals.
They will only be saved to their canonical location before calling helpers,
but they won't be reloaded afterwards.
- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if
the return value is not used.
Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS.
On some TCG targets (e.g. x86), several calling conventions are
supported.
* Branches:
Use the instruction 'br' to jump to a label.
3.3) Code Optimizations
When generating instructions, you can count on at least the following
optimizations:
- Single instructions are simplified, e.g.
and_i32 t0, t0, $0xffffffff
is suppressed.
- A liveness analysis is done at the basic block level. The
information is used to suppress moves from a dead variable to
another one. It is also used to remove instructions which compute
dead results. The later is especially useful for condition code
optimization in QEMU.
In the following example:
add_i32 t0, t1, t2
add_i32 t0, t0, $1
mov_i32 t0, $1
only the last instruction is kept.
3.4) Instruction Reference
********* Function call
* call <ret> <params> ptr
call function 'ptr' (pointer type)
<ret> optional 32 bit or 64 bit return value
<params> optional 32 bit or 64 bit parameters
********* Jumps/Labels
* set_label $label
Define label 'label' at the current program point.
* br $label
Jump to label.
* brcond_i32/i64 t0, t1, cond, label
Conditional jump if t0 cond t1 is true. cond can be:
TCG_COND_EQ
TCG_COND_NE
TCG_COND_LT /* signed */
TCG_COND_GE /* signed */
TCG_COND_LE /* signed */
TCG_COND_GT /* signed */
TCG_COND_LTU /* unsigned */
TCG_COND_GEU /* unsigned */
TCG_COND_LEU /* unsigned */
TCG_COND_GTU /* unsigned */
********* Arithmetic
* add_i32/i64 t0, t1, t2
t0=t1+t2
* sub_i32/i64 t0, t1, t2
t0=t1-t2
* neg_i32/i64 t0, t1
t0=-t1 (two's complement)
* mul_i32/i64 t0, t1, t2
t0=t1*t2
* div_i32/i64 t0, t1, t2
t0=t1/t2 (signed). Undefined behavior if division by zero or overflow.
* divu_i32/i64 t0, t1, t2
t0=t1/t2 (unsigned). Undefined behavior if division by zero.
* rem_i32/i64 t0, t1, t2
t0=t1%t2 (signed). Undefined behavior if division by zero or overflow.
* remu_i32/i64 t0, t1, t2
t0=t1%t2 (unsigned). Undefined behavior if division by zero.
********* Logical
* and_i32/i64 t0, t1, t2
t0=t1&t2
* or_i32/i64 t0, t1, t2
t0=t1|t2
* xor_i32/i64 t0, t1, t2
t0=t1^t2
* not_i32/i64 t0, t1
t0=~t1
* andc_i32/i64 t0, t1, t2
t0=t1&~t2
* eqv_i32/i64 t0, t1, t2
t0=~(t1^t2), or equivalently, t0=t1^~t2
* nand_i32/i64 t0, t1, t2
t0=~(t1&t2)
* nor_i32/i64 t0, t1, t2
t0=~(t1|t2)
* orc_i32/i64 t0, t1, t2
t0=t1|~t2
* clz_i32/i64 t0, t1, t2
t0 = t1 ? clz(t1) : t2
* ctz_i32/i64 t0, t1, t2
t0 = t1 ? ctz(t1) : t2
* ctpop_i32/i64 t0, t1
t0 = number of bits set in t1
With "ctpop" short for "count population", matching
the function name used in include/qemu/host-utils.h.
********* Shifts/Rotates
* shl_i32/i64 t0, t1, t2
t0=t1 << t2. Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
* shr_i32/i64 t0, t1, t2
t0=t1 >> t2 (unsigned). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
* sar_i32/i64 t0, t1, t2
t0=t1 >> t2 (signed). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
* rotl_i32/i64 t0, t1, t2
Rotation of t2 bits to the left.
Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
* rotr_i32/i64 t0, t1, t2
Rotation of t2 bits to the right.
Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
********* Misc
* mov_i32/i64 t0, t1
t0 = t1
Move t1 to t0 (both operands must have the same type).
* ext8s_i32/i64 t0, t1
ext8u_i32/i64 t0, t1
ext16s_i32/i64 t0, t1
ext16u_i32/i64 t0, t1
ext32s_i64 t0, t1
ext32u_i64 t0, t1
8, 16 or 32 bit sign/zero extension (both operands must have the same type)
* bswap16_i32/i64 t0, t1, flags
16 bit byte swap on the low bits of a 32/64 bit input.
If flags & TCG_BSWAP_IZ, then t1 is known to be zero-extended from bit 15.
If flags & TCG_BSWAP_OZ, then t0 will be zero-extended from bit 15.
If flags & TCG_BSWAP_OS, then t0 will be sign-extended from bit 15.
If neither TCG_BSWAP_OZ nor TCG_BSWAP_OS are set, then the bits of
t0 above bit 15 may contain any value.
* bswap32_i64 t0, t1, flags
32 bit byte swap on a 64-bit value. The flags are the same as for bswap16,
except they apply from bit 31 instead of bit 15.
* bswap32_i32 t0, t1, flags
* bswap64_i64 t0, t1, flags
32/64 bit byte swap. The flags are ignored, but still present
for consistency with the other bswap opcodes.
* discard_i32/i64 t0
Indicate that the value of t0 won't be used later. It is useful to
force dead code elimination.
* deposit_i32/i64 dest, t1, t2, pos, len
Deposit T2 as a bitfield into T1, placing the result in DEST.
The bitfield is described by POS/LEN, which are immediate values:
LEN - the length of the bitfield
POS - the position of the first bit, counting from the LSB
For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field
at bit 8. This operation would be equivalent to
dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00)
* extract_i32/i64 dest, t1, pos, len
* sextract_i32/i64 dest, t1, pos, len
Extract a bitfield from T1, placing the result in DEST.
The bitfield is described by POS/LEN, which are immediate values,
as above for deposit. For extract_*, the result will be extended
to the left with zeros; for sextract_*, the result will be extended
to the left with copies of the bitfield sign bit at pos + len - 1.
For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field
at bit 8. This operation would be equivalent to
dest = (t1 << 20) >> 28
(using an arithmetic right shift).
* extract2_i32/i64 dest, t1, t2, pos
For N = {32,64}, extract an N-bit quantity from the concatenation
of t2:t1, beginning at pos. The tcg_gen_extract2_{i32,i64} expander
accepts 0 <= pos <= N as inputs. The backend code generator will
not see either 0 or N as inputs for these opcodes.
* extrl_i64_i32 t0, t1
For 64-bit hosts only, extract the low 32-bits of input T1 and place it
into 32-bit output T0. Depending on the host, this may be a simple move,
or may require additional canonicalization.
* extrh_i64_i32 t0, t1
For 64-bit hosts only, extract the high 32-bits of input T1 and place it
into 32-bit output T0. Depending on the host, this may be a simple shift,
or may require additional canonicalization.
********* Conditional moves
* setcond_i32/i64 dest, t1, t2, cond
dest = (t1 cond t2)
Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
* movcond_i32/i64 dest, c1, c2, v1, v2, cond
dest = (c1 cond c2 ? v1 : v2)
Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2.
********* Type conversions
* ext_i32_i64 t0, t1
Convert t1 (32 bit) to t0 (64 bit) and does sign extension
* extu_i32_i64 t0, t1
Convert t1 (32 bit) to t0 (64 bit) and does zero extension
* trunc_i64_i32 t0, t1
Truncate t1 (64 bit) to t0 (32 bit)
* concat_i32_i64 t0, t1, t2
Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
from t2 (32 bit).
* concat32_i64 t0, t1, t2
Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half
from t2 (64 bit).
********* Load/Store
* ld_i32/i64 t0, t1, offset
ld8s_i32/i64 t0, t1, offset
ld8u_i32/i64 t0, t1, offset
ld16s_i32/i64 t0, t1, offset
ld16u_i32/i64 t0, t1, offset
ld32s_i64 t0, t1, offset
ld32u_i64 t0, t1, offset
t0 = read(t1 + offset)
Load 8, 16, 32 or 64 bits with or without sign extension from host memory.
offset must be a constant.
* st_i32/i64 t0, t1, offset
st8_i32/i64 t0, t1, offset
st16_i32/i64 t0, t1, offset
st32_i64 t0, t1, offset
write(t0, t1 + offset)
Write 8, 16, 32 or 64 bits to host memory.
All this opcodes assume that the pointed host memory doesn't correspond
to a global. In the latter case the behaviour is unpredictable.
********* Multiword arithmetic support
* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
Similar to add/sub, except that the double-word inputs T1 and T2 are
formed from two single-word arguments, and the double-word output T0
is returned in two single-word outputs.
* mulu2_i32/i64 t0_low, t0_high, t1, t2
Similar to mul, except two unsigned inputs T1 and T2 yielding the full
double-word product T0. The later is returned in two single-word outputs.
* muls2_i32/i64 t0_low, t0_high, t1, t2
Similar to mulu2, except the two inputs T1 and T2 are signed.
* mulsh_i32/i64 t0, t1, t2
* muluh_i32/i64 t0, t1, t2
Provide the high part of a signed or unsigned multiply, respectively.
If mulu2/muls2 are not provided by the backend, the tcg-op generator
can obtain the same results can be obtained by emitting a pair of
opcodes, mul+muluh/mulsh.
********* Memory Barrier support
* mb <$arg>
Generate a target memory barrier instruction to ensure memory ordering as being
enforced by a corresponding guest memory barrier instruction. The ordering
enforced by the backend may be stricter than the ordering required by the guest.
It cannot be weaker. This opcode takes a constant argument which is required to
generate the appropriate barrier instruction. The backend should take care to
emit the target barrier instruction only when necessary i.e., for SMP guests and
when MTTCG is enabled.
The guest translators should generate this opcode for all guest instructions
which have ordering side effects.
Please see docs/devel/atomics.rst for more information on memory barriers.
********* 64-bit guest on 32-bit host support
The following opcodes are internal to TCG. Thus they are to be implemented by
32-bit host code generators, but are not to be emitted by guest translators.
They are emitted as needed by inline functions within "tcg-op.h".
* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label
Similar to brcond, except that the 64-bit values T0 and T1
are formed from two 32-bit arguments.
* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
Similar to setcond, except that the 64-bit values T1 and T2 are
formed from two 32-bit arguments. The result is a 32-bit value.
********* QEMU specific operations
* exit_tb t0
Exit the current TB and return the value t0 (word type).
* goto_tb index
Exit the current TB and jump to the TB index 'index' (constant) if the
current TB was linked to this TB. Otherwise execute the next
instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
at most once with each slot index per TB.
* lookup_and_goto_ptr tb_addr
Look up a TB address ('tb_addr') and jump to it if valid. If not valid,
jump to the TCG epilogue to go back to the exec loop.
This operation is optional. If the TCG backend does not implement the
goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
* qemu_ld_i32/i64 t0, t1, flags, memidx
* qemu_st_i32/i64 t0, t1, flags, memidx
* qemu_st8_i32 t0, t1, flags, memidx
Load data at the guest address t1 into t0, or store data in t0 at guest
address t1. The _i32/_i64 size applies to the size of the input/output
register t0 only. The address t1 is always sized according to the guest,
and the width of the memory operation is controlled by flags.
Both t0 and t1 may be split into little-endian ordered pairs of registers
if dealing with 64-bit quantities on a 32-bit host.
The memidx selects the qemu tlb index to use (e.g. user or kernel access).
The flags are the MemOp bits, selecting the sign, width, and endianness
of the memory access.
For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
64-bit memory access specified in flags.
For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
the memory operation is known to be 8-bit. This allows the backend to
provide a different set of register constraints.
********* Host vector operations
All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE.
The former specifies the length of the vector in log2 64-bit units; the
later specifies the length of the element (if applicable) in log2 8-bit units.
E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
* mov_vec v0, v1
* ld_vec v0, t1
* st_vec v0, t1
Move, load and store.
* dup_vec v0, r1
Duplicate the low N bits of R1 into VECL/VECE copies across V0.
* dupi_vec v0, c
Similarly, for a constant.
Smaller values will be replicated to host register size by the expanders.
* dup2_vec v0, r1, r2
Duplicate r2:r1 into VECL/64 copies across V0. This opcode is
only present for 32-bit hosts.
* add_vec v0, v1, v2
v0 = v1 + v2, in elements across the vector.
* sub_vec v0, v1, v2
Similarly, v0 = v1 - v2.
* mul_vec v0, v1, v2
Similarly, v0 = v1 * v2.
* neg_vec v0, v1
Similarly, v0 = -v1.
* abs_vec v0, v1
Similarly, v0 = v1 < 0 ? -v1 : v1, in elements across the vector.
* smin_vec:
* umin_vec:
Similarly, v0 = MIN(v1, v2), for signed and unsigned element types.
* smax_vec:
* umax_vec:
Similarly, v0 = MAX(v1, v2), for signed and unsigned element types.
* ssadd_vec:
* sssub_vec:
* usadd_vec:
* ussub_vec:
Signed and unsigned saturating addition and subtraction. If the true
result is not representable within the element type, the element is
set to the minimum or maximum value for the type.
* and_vec v0, v1, v2
* or_vec v0, v1, v2
* xor_vec v0, v1, v2
* andc_vec v0, v1, v2
* orc_vec v0, v1, v2
* not_vec v0, v1
Similarly, logical operations with and without complement.
Note that VECE is unused.
* shli_vec v0, v1, i2
* shls_vec v0, v1, s2
Shift all elements from v1 by a scalar i2/s2. I.e.
for (i = 0; i < VECL/VECE; ++i) {
v0[i] = v1[i] << s2;
}
* shri_vec v0, v1, i2
* sari_vec v0, v1, i2
* rotli_vec v0, v1, i2
* shrs_vec v0, v1, s2
* sars_vec v0, v1, s2
Similarly for logical and arithmetic right shift, and left rotate.
* shlv_vec v0, v1, v2
Shift elements from v1 by elements from v2. I.e.
for (i = 0; i < VECL/VECE; ++i) {
v0[i] = v1[i] << v2[i];
}
* shrv_vec v0, v1, v2
* sarv_vec v0, v1, v2
* rotlv_vec v0, v1, v2
* rotrv_vec v0, v1, v2
Similarly for logical and arithmetic right shift, and rotates.
* cmp_vec v0, v1, v2, cond
Compare vectors by element, storing -1 for true and 0 for false.
* bitsel_vec v0, v1, v2, v3
Bitwise select, v0 = (v2 & v1) | (v3 & ~v1), across the entire vector.
* cmpsel_vec v0, c1, c2, v3, v4, cond
Select elements based on comparison results:
for (i = 0; i < n; ++i) {
v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i].
}
*********
Note 1: Some shortcuts are defined when the last operand is known to be
a constant (e.g. addi for add, movi for mov).
Note 2: When using TCG, the opcodes must never be generated directly
as some of them may not be available as "real" opcodes. Always use the
function tcg_gen_xxx(args).
4) Backend
tcg-target.h contains the target specific definitions. tcg-target.c.inc
contains the target specific code; it is #included by tcg/tcg.c, rather
than being a standalone C file.
4.1) Assumptions
The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
64 bit. It is expected that the pointer has the same size as the word.
On a 32 bit target, all 64 bit operations are converted to 32 bits. A
few specific operations must be implemented to allow it (see add2_i32,
sub2_i32, brcond2_i32).
On a 64 bit target, the values are transferred between 32 and 64-bit
registers using the following ops:
- trunc_shr_i64_i32
- ext_i32_i64
- extu_i32_i64
They ensure that the values are correctly truncated or extended when
moved from a 32-bit to a 64-bit register or vice-versa. Note that the
trunc_shr_i64_i32 is an optional op. It is not necessary to implement
it if all the following conditions are met:
- 64-bit registers can hold 32-bit values
- 32-bit values in a 64-bit register do not need to stay zero or
sign extended
- all 32-bit TCG ops ignore the high part of 64-bit registers
Floating point operations are not supported in this version. A
previous incarnation of the code generator had full support of them,
but it is better to concentrate on integer operations first.
4.2) Constraints
GCC like constraints are used to define the constraints of every
instruction. Memory constraints are not supported in this
version. Aliases are specified in the input operands as for GCC.
The same register may be used for both an input and an output, even when
they are not explicitly aliased. If an op expands to multiple target
instructions then care must be taken to avoid clobbering input values.
GCC style "early clobber" outputs are supported, with '&'.
A target can define specific register or constant constraints. If an
operation uses a constant input constraint which does not allow all
constants, it must also accept registers in order to have a fallback.
The constraint 'i' is defined generically to accept any constant.
The constraint 'r' is not defined generically, but is consistently
used by each backend to indicate all registers.
The movi_i32 and movi_i64 operations must accept any constants.
The mov_i32 and mov_i64 operations must accept any registers of the
same type.
The ld/st/sti instructions must accept signed 32 bit constant offsets.
This can be implemented by reserving a specific register in which to
compute the address if the offset is too big.
The ld/st instructions must accept any destination (ld) or source (st)
register.
The sti instruction may fail if it cannot store the given constant.
4.3) Function call assumptions
- The only supported types for parameters and return value are: 32 and
64 bit integers and pointer.
- The stack grows downwards.
- The first N parameters are passed in registers.
- The next parameters are passed on the stack by storing them as words.
- Some registers are clobbered during the call.
- The function can return 0 or 1 value in registers. On a 32 bit
target, functions must be able to return 2 values in registers for
64 bit return type.
5) Recommended coding rules for best performance
- Use globals to represent the parts of the QEMU CPU state which are
often modified, e.g. the integer registers and the condition
codes. TCG will be able to use host registers to store them.
- Avoid globals stored in fixed registers. They must be used only to
store the pointer to the CPU state and possibly to store a pointer
to a register window.
- Use temporaries. Use local temporaries only when really needed,
e.g. when you need to use a value after a jump. Local temporaries
introduce a performance hit in the current TCG implementation: their
content is saved to memory at end of each basic block.
- Free temporaries and local temporaries when they are no longer used
(tcg_temp_free). Since tcg_const_x() also creates a temporary, you
should free it after it is used. Freeing temporaries does not yield
a better generated code, but it reduces the memory usage of TCG and
the speed of the translation.
- Don't hesitate to use helpers for complicated or seldom used guest
instructions. There is little performance advantage in using TCG to
implement guest instructions taking more than about twenty TCG
instructions. Note that this rule of thumb is more applicable to
helpers doing complex logic or arithmetic, where the C compiler has
scope to do a good job of optimisation; it is less relevant where
the instruction is mostly doing loads and stores, and in those cases
inline TCG may still be faster for longer sequences.
- The hard limit on the number of TCG instructions you can generate
per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h --
you cannot exceed this without risking a buffer overrun.
- Use the 'discard' instruction if you know that TCG won't be able to
prove that a given global is "dead" at a given program point. The
x86 guest uses it to improve the condition codes optimisation.

View File

@ -1336,22 +1336,23 @@ static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
}
}
static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
{
tcg_out_insn(s, 3207, BLR, reg);
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
{
ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
if (offset == sextract64(offset, 0, 26)) {
tcg_out_insn(s, 3206, BL, offset);
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
tcg_out_callr(s, TCG_REG_TMP);
tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
}
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
const TCGHelperInfo *info)
{
tcg_out_call_int(s, target);
}
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
uintptr_t jmp_rw, uintptr_t addr)
{
@ -1599,7 +1600,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
tcg_out_adr(s, TCG_REG_X3, lb->raddr);
tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
if (opc & MO_SIGN) {
tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
} else {
@ -1625,7 +1626,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
tcg_out_adr(s, TCG_REG_X4, lb->raddr);
tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
tcg_out_goto(s, lb->raddr);
return true;
}

View File

@ -16,7 +16,6 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
#undef TCG_TARGET_STACK_GROWSUP
typedef enum {
TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
@ -52,8 +51,9 @@ typedef enum {
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_SP
#define TCG_TARGET_STACK_ALIGN 16
#define TCG_TARGET_CALL_ALIGN_ARGS 1
#define TCG_TARGET_CALL_STACK_OFFSET 0
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
/* optional instructions */
#define TCG_TARGET_HAS_div_i32 1

View File

@ -1131,7 +1131,7 @@ static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr)
* The call case is mostly used for helpers - so it's not unreasonable
* for them to be beyond branch range.
*/
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr)
static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *addr)
{
intptr_t addri = (intptr_t)addr;
ptrdiff_t disp = tcg_pcrel_diff(s, addr);
@ -1150,6 +1150,12 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr)
tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP);
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr,
const TCGHelperInfo *info)
{
tcg_out_call_int(s, addr);
}
static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l)
{
if (l->has_value) {
@ -1515,7 +1521,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
/* Use the canonical unsigned helpers and minimize icache usage. */
tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
datalo = lb->datalo_reg;
datahi = lb->datahi_reg;

View File

@ -30,7 +30,6 @@ extern int arm_arch;
#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
#undef TCG_TARGET_STACK_GROWSUP
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
#define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
@ -89,8 +88,9 @@ extern bool use_neon_instructions;
/* used for function call generation */
#define TCG_TARGET_STACK_ALIGN 8
#define TCG_TARGET_CALL_ALIGN_ARGS 1
#define TCG_TARGET_CALL_STACK_OFFSET 0
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
/* optional instructions */
#define TCG_TARGET_HAS_ext8s_i32 1

View File

@ -1661,7 +1661,8 @@ static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest)
}
}
static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
const TCGHelperInfo *info)
{
tcg_out_branch(s, 1, dest);
}
@ -1885,7 +1886,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
(uintptr_t)l->raddr);
}
tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
data_reg = l->datalo_reg;
switch (opc & MO_SSIZE) {

View File

@ -98,6 +98,8 @@ typedef enum {
#else
#define TCG_TARGET_CALL_STACK_OFFSET 0
#endif
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
extern bool have_bmi1;
extern bool have_popcnt;

View File

@ -567,7 +567,8 @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
}
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
const TCGHelperInfo *info)
{
tcg_out_call_int(s, arg, false);
}
@ -760,7 +761,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
tcg_out_call(s, qemu_ld_helpers[size]);
tcg_out_call_int(s, qemu_ld_helpers[size], false);
switch (opc & MO_SSIZE) {
case MO_SB:
@ -821,7 +822,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
tcg_out_call(s, qemu_st_helpers[size]);
tcg_out_call_int(s, qemu_st_helpers[size], false);
return tcg_out_goto(s, l->raddr);
}

View File

@ -92,8 +92,9 @@ typedef enum {
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_SP
#define TCG_TARGET_STACK_ALIGN 16
#define TCG_TARGET_CALL_ALIGN_ARGS 1
#define TCG_TARGET_CALL_STACK_OFFSET 0
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
/* optional instructions */
#define TCG_TARGET_HAS_movcond_i32 0

View File

@ -1020,7 +1020,8 @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
}
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
const TCGHelperInfo *info)
{
tcg_out_call_int(s, arg, false);
tcg_out_nop(s);

View File

@ -83,10 +83,12 @@ typedef enum {
#define TCG_TARGET_STACK_ALIGN 16
#if _MIPS_SIM == _ABIO32
# define TCG_TARGET_CALL_STACK_OFFSET 16
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
#else
# define TCG_TARGET_CALL_STACK_OFFSET 0
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
#endif
#define TCG_TARGET_CALL_ALIGN_ARGS 1
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
/* MOVN/MOVZ instructions detection */
#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \

View File

@ -667,9 +667,7 @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
{
for (int i = 0; i < nb_args; i++) {
TCGTemp *ts = arg_temp(op->args[i]);
if (ts) {
init_ts_info(ctx, ts);
}
init_ts_info(ctx, ts);
}
}
@ -680,7 +678,7 @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
TCGTemp *ts = arg_temp(op->args[i]);
if (ts && ts_is_copy(ts)) {
if (ts_is_copy(ts)) {
op->args[i] = temp_arg(find_better_copy(s, ts));
}
}
@ -962,7 +960,7 @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
rh = op->args[1];
/* The proper opcode is supplied by tcg_opt_gen_mov. */
op2 = tcg_op_insert_before(ctx->tcg, op, 0);
op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
tcg_opt_gen_movi(ctx, op, rl, al);
tcg_opt_gen_movi(ctx, op2, rh, ah);
@ -1613,7 +1611,7 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
rh = op->args[1];
/* The proper opcode is supplied by tcg_opt_gen_mov. */
op2 = tcg_op_insert_before(ctx->tcg, op, 0);
op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
tcg_opt_gen_movi(ctx, op, rl, l);
tcg_opt_gen_movi(ctx, op2, rh, h);

View File

@ -42,10 +42,17 @@
# else
# error "Unknown ABI"
# endif
#endif
#endif
#if TCG_TARGET_REG_BITS == 64
# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
#else
# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
#endif
#ifdef _CALL_SYSV
# define TCG_TARGET_CALL_ALIGN_ARGS 1
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
#else
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
#endif
/* For some memory operations, we need a scratch that isn't R0. For the AIX
@ -1995,7 +2002,8 @@ static void tcg_out_call_int(TCGContext *s, int lk,
#endif
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
const TCGHelperInfo *info)
{
tcg_out_call_int(s, LK, target);
}
@ -2202,9 +2210,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
lo = lb->addrlo_reg;
hi = lb->addrhi_reg;
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
arg |= 1;
#endif
arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
} else {
@ -2216,7 +2222,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
tcg_out32(s, MFSPR | RT(arg) | LR);
tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
lo = lb->datalo_reg;
hi = lb->datahi_reg;
@ -2250,9 +2256,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
lo = lb->addrlo_reg;
hi = lb->addrhi_reg;
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
arg |= 1;
#endif
arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
} else {
@ -2266,9 +2270,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
if (TCG_TARGET_REG_BITS == 32) {
switch (s_bits) {
case MO_64:
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
arg |= 1;
#endif
arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
/* FALLTHRU */
case MO_32:
@ -2289,7 +2291,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
tcg_out32(s, MFSPR | RT(arg) | LR);
tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_b(s, 0, lb->raddr);
return true;
@ -2324,9 +2326,8 @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
TCGReg arg = TCG_REG_R4;
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
arg |= 1;
#endif
arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
if (l->addrlo_reg != arg) {
tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
@ -2525,7 +2526,6 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
/* Parameters for function call generation, used in tcg.c. */
#define TCG_TARGET_STACK_ALIGN 16
#define TCG_TARGET_EXTEND_ARGS 1
#ifdef _CALL_AIX
# define LINK_AREA_SIZE (6 * SZR)

View File

@ -819,7 +819,8 @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
}
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
const TCGHelperInfo *info)
{
tcg_out_call_int(s, arg, false);
}
@ -1002,7 +1003,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
tcg_out_call(s, qemu_ld_helpers[opc & MO_SSIZE]);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
tcg_out_goto(s, l->raddr);
@ -1047,7 +1048,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
tcg_out_goto(s, l->raddr);
return true;

View File

@ -81,8 +81,13 @@ typedef enum {
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_SP
#define TCG_TARGET_STACK_ALIGN 16
#define TCG_TARGET_CALL_ALIGN_ARGS 1
#define TCG_TARGET_CALL_STACK_OFFSET 0
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
#if TCG_TARGET_REG_BITS == 32
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
#else
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
#endif
/* optional instructions */
#define TCG_TARGET_HAS_movcond_i32 0

View File

@ -802,9 +802,9 @@ static bool maybe_out_small_movi(TCGContext *s, TCGType type,
}
for (i = 0; i < 4; i++) {
tcg_target_long mask = 0xffffull << i*16;
tcg_target_long mask = 0xffffull << i * 16;
if ((uval & mask) == uval) {
tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i * 16);
return true;
}
}
@ -1221,9 +1221,9 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
/* Try all 32-bit insns that can perform it in one go. */
for (i = 0; i < 4; i++) {
tcg_target_ulong mask = ~(0xffffull << i*16);
tcg_target_ulong mask = ~(0xffffull << i * 16);
if (((val | ~valid) & mask) == mask) {
tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
tcg_out_insn_RI(s, ni_insns[i], dest, val >> i * 16);
return;
}
}
@ -1231,9 +1231,9 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
/* Try all 48-bit insns that can perform it in one go. */
if (HAVE_FACILITY(EXT_IMM)) {
for (i = 0; i < 2; i++) {
tcg_target_ulong mask = ~(0xffffffffull << i*32);
tcg_target_ulong mask = ~(0xffffffffull << i * 32);
if (((val | ~valid) & mask) == mask) {
tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i * 32);
return;
}
}
@ -1279,9 +1279,9 @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
/* Try all 32-bit insns that can perform it in one go. */
for (i = 0; i < 4; i++) {
tcg_target_ulong mask = (0xffffull << i*16);
tcg_target_ulong mask = (0xffffull << i * 16);
if ((val & mask) != 0 && (val & ~mask) == 0) {
tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
tcg_out_insn_RI(s, oi_insns[i], dest, val >> i * 16);
return;
}
}
@ -1289,9 +1289,9 @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
/* Try all 48-bit insns that can perform it in one go. */
if (HAVE_FACILITY(EXT_IMM)) {
for (i = 0; i < 2; i++) {
tcg_target_ulong mask = (0xffffffffull << i*32);
tcg_target_ulong mask = (0xffffffffull << i * 32);
if ((val & mask) != 0 && (val & ~mask) == 0) {
tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i * 32);
return;
}
}
@ -1691,7 +1691,7 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
tgen_branch(s, cc, l);
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
{
ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
if (off == (int32_t)off) {
@ -1702,6 +1702,12 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
}
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
const TCGHelperInfo *info)
{
tcg_out_call_int(s, dest);
}
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
TCGReg base, TCGReg index, int disp)
{
@ -1897,7 +1903,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
}
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
@ -1938,7 +1944,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
}
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
return true;

View File

@ -166,8 +166,9 @@ extern uint64_t s390_facilities[3];
/* used for function call generation */
#define TCG_TARGET_STACK_ALIGN 8
#define TCG_TARGET_CALL_STACK_OFFSET 160
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
#define TCG_TARGET_EXTEND_ARGS 1
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)

View File

@ -859,7 +859,8 @@ static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest,
}
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
const TCGHelperInfo *info)
{
tcg_out_call_nodelay(s, dest, false);
tcg_out_nop(s);

View File

@ -71,7 +71,8 @@ typedef enum {
#define TCG_TARGET_STACK_BIAS 2047
#define TCG_TARGET_STACK_ALIGN 16
#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS)
#define TCG_TARGET_EXTEND_ARGS 1
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
#if defined(__VIS__) && __VIS__ >= 0x300
#define use_vis3_instructions 1

View File

@ -25,13 +25,53 @@
#ifndef TCG_INTERNAL_H
#define TCG_INTERNAL_H
#ifdef CONFIG_TCG_INTERPRETER
#include <ffi.h>
#endif
#define TCG_HIGHWATER 1024
/*
* Describe the calling convention of a given argument type.
*/
typedef enum {
TCG_CALL_RET_NORMAL, /* by registers */
} TCGCallReturnKind;
typedef enum {
TCG_CALL_ARG_NORMAL, /* by registers (continuing onto stack) */
TCG_CALL_ARG_EVEN, /* like normal, but skipping odd slots */
TCG_CALL_ARG_EXTEND, /* for i32, as a sign/zero-extended i64 */
TCG_CALL_ARG_EXTEND_U, /* ... as a zero-extended i64 */
TCG_CALL_ARG_EXTEND_S, /* ... as a sign-extended i64 */
} TCGCallArgumentKind;
typedef struct TCGCallArgumentLoc {
TCGCallArgumentKind kind : 8;
unsigned arg_slot : 8;
unsigned ref_slot : 8;
unsigned arg_idx : 4;
unsigned tmp_subindex : 2;
} TCGCallArgumentLoc;
/* Avoid "unsigned < 0 is always false" Werror, when iarg_regs is empty. */
#define REG_P(L) \
((int)(L)->arg_slot < (int)ARRAY_SIZE(tcg_target_call_iarg_regs))
typedef struct TCGHelperInfo {
void *func;
const char *name;
unsigned flags;
unsigned typemask;
#ifdef CONFIG_TCG_INTERPRETER
ffi_cif *cif;
#endif
unsigned typemask : 32;
unsigned flags : 8;
unsigned nr_in : 8;
unsigned nr_out : 8;
TCGCallReturnKind out_kind : 8;
/* Maximum physical arguments are constrained by TCG_TYPE_I128. */
TCGCallArgumentLoc in[MAX_CALL_IARGS * (128 / TCG_TARGET_REG_BITS)];
} TCGHelperInfo;
extern TCGContext tcg_init_ctx;
@ -59,4 +99,18 @@ static inline unsigned tcg_call_flags(TCGOp *op)
return tcg_call_info(op)->flags;
}
#if TCG_TARGET_REG_BITS == 32
static inline TCGv_i32 TCGV_LOW(TCGv_i64 t)
{
return temp_tcgv_i32(tcgv_i64_temp(t) + HOST_BIG_ENDIAN);
}
static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t)
{
return temp_tcgv_i32(tcgv_i64_temp(t) + !HOST_BIG_ENDIAN);
}
#else
extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
extern TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
#endif
#endif /* TCG_INTERNAL_H */

View File

@ -21,6 +21,8 @@
#include "tcg/tcg.h"
#include "tcg/tcg-op.h"
#include "tcg/tcg-mo.h"
#include "tcg-internal.h"
/* Reduce the number of ifdefs below. This assumes that all uses of
TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
@ -150,7 +152,7 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 2);
TCGOP_VECL(op) = type - TCG_TYPE_V64;
TCGOP_VECE(op) = vece;
op->args[0] = r;
@ -160,7 +162,7 @@ void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg r, TCGArg a, TCGArg b)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 3);
TCGOP_VECL(op) = type - TCG_TYPE_V64;
TCGOP_VECE(op) = vece;
op->args[0] = r;
@ -171,7 +173,7 @@ void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg r, TCGArg a, TCGArg b, TCGArg c)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 4);
TCGOP_VECL(op) = type - TCG_TYPE_V64;
TCGOP_VECE(op) = vece;
op->args[0] = r;
@ -183,7 +185,7 @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 6);
TCGOP_VECL(op) = type - TCG_TYPE_V64;
TCGOP_VECE(op) = vece;
op->args[0] = r;

View File

@ -28,33 +28,25 @@
#include "tcg/tcg-op.h"
#include "tcg/tcg-mo.h"
#include "exec/plugin-gen.h"
#include "tcg-internal.h"
/* Reduce the number of ifdefs below. This assumes that all uses of
TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
the compiler can eliminate. */
#if TCG_TARGET_REG_BITS == 64
extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
#define TCGV_LOW TCGV_LOW_link_error
#define TCGV_HIGH TCGV_HIGH_link_error
#endif
void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 1);
op->args[0] = a1;
}
void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 2);
op->args[0] = a1;
op->args[1] = a2;
}
void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 3);
op->args[0] = a1;
op->args[1] = a2;
op->args[2] = a3;
@ -62,7 +54,7 @@ void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 4);
op->args[0] = a1;
op->args[1] = a2;
op->args[2] = a3;
@ -72,7 +64,7 @@ void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
TCGArg a4, TCGArg a5)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 5);
op->args[0] = a1;
op->args[1] = a2;
op->args[2] = a3;
@ -83,7 +75,7 @@ void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
TCGArg a4, TCGArg a5, TCGArg a6)
{
TCGOp *op = tcg_emit_op(opc);
TCGOp *op = tcg_emit_op(opc, 6);
op->args[0] = a1;
op->args[1] = a2;
op->args[2] = a3;
@ -1171,6 +1163,21 @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
#endif
}
void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
{
tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset);
}
void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
{
tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset);
}
void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
{
tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
}
void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
{
#if HOST_BIG_ENDIAN
@ -1182,6 +1189,18 @@ void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
#endif
}
void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
{
tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
}
void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
{
tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
}
void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
{
tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));

1663
tcg/tcg.c

File diff suppressed because it is too large Load Diff

View File

@ -18,7 +18,6 @@
*/
#include "qemu/osdep.h"
#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */
#include "exec/cpu_ldst.h"
#include "tcg/tcg-op.h"
#include "tcg/tcg-ldst.h"

View File

@ -197,10 +197,6 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R0,
};
#if MAX_OPC_PARAM_IARGS != 7
# error Fix needed, number of supported input arguments changed!
#endif
/* No call arguments via registers. All will be stored on the "stack". */
static const int tcg_target_call_iarg_regs[] = { };
@ -562,8 +558,9 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
ffi_cif *cif)
const TCGHelperInfo *info)
{
ffi_cif *cif = info->cif;
tcg_insn_unit insn = 0;
uint8_t which;

View File

@ -158,6 +158,13 @@ typedef enum {
/* Used for function call generation. */
#define TCG_TARGET_CALL_STACK_OFFSET 0
#define TCG_TARGET_STACK_ALIGN 8
#if TCG_TARGET_REG_BITS == 32
# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EVEN
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
#else
# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
#endif
#define HAVE_TCG_QEMU_TB_EXEC
#define TCG_TARGET_NEED_POOL_LABELS

View File

@ -39,6 +39,9 @@ signals: LDFLAGS+=-lrt -lpthread
munmap-pthread: CFLAGS+=-pthread
munmap-pthread: LDFLAGS+=-pthread
vma-pthread: CFLAGS+=-pthread
vma-pthread: LDFLAGS+=-pthread
# We define the runner for test-mmap after the individual
# architectures have defined their supported pages sizes. If no
# additional page sizes are defined we only run the default test.

View File

@ -7,21 +7,7 @@
#include <sys/mman.h>
#include <unistd.h>
static const char nop_func[] = {
#if defined(__aarch64__)
0xc0, 0x03, 0x5f, 0xd6, /* ret */
#elif defined(__alpha__)
0x01, 0x80, 0xFA, 0x6B, /* ret */
#elif defined(__arm__)
0x1e, 0xff, 0x2f, 0xe1, /* bx lr */
#elif defined(__riscv)
0x67, 0x80, 0x00, 0x00, /* ret */
#elif defined(__s390__)
0x07, 0xfe, /* br %r14 */
#elif defined(__i386__) || defined(__x86_64__)
0xc3, /* ret */
#endif
};
#include "nop_func.h"
static void *thread_mmap_munmap(void *arg)
{

View File

@ -0,0 +1,25 @@
/*
* No-op functions that can be safely copied.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef NOP_FUNC_H
#define NOP_FUNC_H
static const char nop_func[] = {
#if defined(__aarch64__)
0xc0, 0x03, 0x5f, 0xd6, /* ret */
#elif defined(__alpha__)
0x01, 0x80, 0xFA, 0x6B, /* ret */
#elif defined(__arm__)
0x1e, 0xff, 0x2f, 0xe1, /* bx lr */
#elif defined(__riscv)
0x67, 0x80, 0x00, 0x00, /* ret */
#elif defined(__s390__)
0x07, 0xfe, /* br %r14 */
#elif defined(__i386__) || defined(__x86_64__)
0xc3, /* ret */
#endif
};
#endif

View File

@ -0,0 +1,207 @@
/*
* Test that VMA updates do not race.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*
* Map a contiguous chunk of RWX memory. Split it into 8 equally sized
* regions, each of which is guaranteed to have a certain combination of
* protection bits set.
*
* Reader, writer and executor threads perform the respective operations on
* pages, which are guaranteed to have the respective protection bit set.
* Two mutator threads change the non-fixed protection bits randomly.
*/
#include <assert.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <sys/mman.h>
#include <unistd.h>
#include "nop_func.h"
#define PAGE_IDX_BITS 10
#define PAGE_COUNT (1 << PAGE_IDX_BITS)
#define PAGE_IDX_MASK (PAGE_COUNT - 1)
#define REGION_IDX_BITS 3
#define PAGE_IDX_R_MASK (1 << 7)
#define PAGE_IDX_W_MASK (1 << 8)
#define PAGE_IDX_X_MASK (1 << 9)
#define REGION_MASK (PAGE_IDX_R_MASK | PAGE_IDX_W_MASK | PAGE_IDX_X_MASK)
#define PAGES_PER_REGION (1 << (PAGE_IDX_BITS - REGION_IDX_BITS))
struct context {
int pagesize;
char *ptr;
int dev_null_fd;
volatile int mutator_count;
};
static void *thread_read(void *arg)
{
struct context *ctx = arg;
ssize_t sret;
size_t i, j;
int ret;
for (i = 0; ctx->mutator_count; i++) {
char *p;
j = (i & PAGE_IDX_MASK) | PAGE_IDX_R_MASK;
p = &ctx->ptr[j * ctx->pagesize];
/* Read directly. */
ret = memcmp(p, nop_func, sizeof(nop_func));
if (ret != 0) {
fprintf(stderr, "fail direct read %p\n", p);
abort();
}
/* Read indirectly. */
sret = write(ctx->dev_null_fd, p, 1);
if (sret != 1) {
if (sret < 0) {
fprintf(stderr, "fail indirect read %p (%m)\n", p);
} else {
fprintf(stderr, "fail indirect read %p (%zd)\n", p, sret);
}
abort();
}
}
return NULL;
}
static void *thread_write(void *arg)
{
struct context *ctx = arg;
struct timespec *ts;
size_t i, j;
int ret;
for (i = 0; ctx->mutator_count; i++) {
j = (i & PAGE_IDX_MASK) | PAGE_IDX_W_MASK;
/* Write directly. */
memcpy(&ctx->ptr[j * ctx->pagesize], nop_func, sizeof(nop_func));
/* Write using a syscall. */
ts = (struct timespec *)(&ctx->ptr[(j + 1) * ctx->pagesize] -
sizeof(struct timespec));
ret = clock_gettime(CLOCK_REALTIME, ts);
if (ret != 0) {
fprintf(stderr, "fail indirect write %p (%m)\n", ts);
abort();
}
}
return NULL;
}
static void *thread_execute(void *arg)
{
struct context *ctx = arg;
size_t i, j;
for (i = 0; ctx->mutator_count; i++) {
j = (i & PAGE_IDX_MASK) | PAGE_IDX_X_MASK;
((void(*)(void))&ctx->ptr[j * ctx->pagesize])();
}
return NULL;
}
static void *thread_mutate(void *arg)
{
size_t i, start_idx, end_idx, page_idx, tmp;
struct context *ctx = arg;
unsigned int seed;
int prot, ret;
seed = (unsigned int)time(NULL);
for (i = 0; i < 10000; i++) {
start_idx = rand_r(&seed) & PAGE_IDX_MASK;
end_idx = rand_r(&seed) & PAGE_IDX_MASK;
if (start_idx > end_idx) {
tmp = start_idx;
start_idx = end_idx;
end_idx = tmp;
}
prot = rand_r(&seed) & (PROT_READ | PROT_WRITE | PROT_EXEC);
for (page_idx = start_idx & REGION_MASK; page_idx <= end_idx;
page_idx += PAGES_PER_REGION) {
if (page_idx & PAGE_IDX_R_MASK) {
prot |= PROT_READ;
}
if (page_idx & PAGE_IDX_W_MASK) {
/* FIXME: qemu syscalls check for both read+write. */
prot |= PROT_WRITE | PROT_READ;
}
if (page_idx & PAGE_IDX_X_MASK) {
prot |= PROT_EXEC;
}
}
ret = mprotect(&ctx->ptr[start_idx * ctx->pagesize],
(end_idx - start_idx + 1) * ctx->pagesize, prot);
assert(ret == 0);
}
__atomic_fetch_sub(&ctx->mutator_count, 1, __ATOMIC_SEQ_CST);
return NULL;
}
int main(void)
{
pthread_t threads[5];
struct context ctx;
size_t i;
int ret;
/* Without a template, nothing to test. */
if (sizeof(nop_func) == 0) {
return EXIT_SUCCESS;
}
/* Initialize memory chunk. */
ctx.pagesize = getpagesize();
ctx.ptr = mmap(NULL, PAGE_COUNT * ctx.pagesize,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert(ctx.ptr != MAP_FAILED);
for (i = 0; i < PAGE_COUNT; i++) {
memcpy(&ctx.ptr[i * ctx.pagesize], nop_func, sizeof(nop_func));
}
ctx.dev_null_fd = open("/dev/null", O_WRONLY);
assert(ctx.dev_null_fd >= 0);
ctx.mutator_count = 2;
/* Start threads. */
ret = pthread_create(&threads[0], NULL, thread_read, &ctx);
assert(ret == 0);
ret = pthread_create(&threads[1], NULL, thread_write, &ctx);
assert(ret == 0);
ret = pthread_create(&threads[2], NULL, thread_execute, &ctx);
assert(ret == 0);
for (i = 3; i <= 4; i++) {
ret = pthread_create(&threads[i], NULL, thread_mutate, &ctx);
assert(ret == 0);
}
/* Wait for threads to stop. */
for (i = 0; i < sizeof(threads) / sizeof(threads[0]); i++) {
ret = pthread_join(threads[i], NULL);
assert(ret == 0);
}
/* Destroy memory chunk. */
ret = close(ctx.dev_null_fd);
assert(ret == 0);
ret = munmap(ctx.ptr, PAGE_COUNT * ctx.pagesize);
assert(ret == 0);
return EXIT_SUCCESS;
}