mirror of https://github.com/xemu-project/xemu.git
util: Enhance flush_icache_range with separate data pointer
We are shortly going to have a split rw/rx jit buffer. Depending on the host, we need to flush the dcache at the rw data pointer and flush the icache at the rx code pointer. For now, the two passed pointers are identical, so there is no effective change in behaviour. Reviewed-by: Joelle van Dyne <j@getutm.app> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
df5d2b1658
commit
1da8de39a3
|
@ -8,16 +8,27 @@
|
||||||
#ifndef QEMU_CACHEFLUSH_H
|
#ifndef QEMU_CACHEFLUSH_H
|
||||||
#define QEMU_CACHEFLUSH_H
|
#define QEMU_CACHEFLUSH_H
|
||||||
|
|
||||||
|
/**
|
||||||
|
* flush_idcache_range:
|
||||||
|
* @rx: instruction address
|
||||||
|
* @rw: data address
|
||||||
|
* @len: length to flush
|
||||||
|
*
|
||||||
|
* Flush @len bytes of the data cache at @rw and the icache at @rx
|
||||||
|
* to bring them in sync. The two addresses may be different virtual
|
||||||
|
* mappings of the same physical page(s).
|
||||||
|
*/
|
||||||
|
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
|
#if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
|
||||||
|
|
||||||
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
|
static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
|
||||||
{
|
{
|
||||||
/* icache is coherent and does not require flushing. */
|
/* icache is coherent and does not require flushing. */
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void flush_icache_range(uintptr_t start, uintptr_t stop);
|
void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -2946,7 +2946,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
|
||||||
invalidate_and_set_dirty(mr, addr1, l);
|
invalidate_and_set_dirty(mr, addr1, l);
|
||||||
break;
|
break;
|
||||||
case FLUSH_CACHE:
|
case FLUSH_CACHE:
|
||||||
flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + l);
|
flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1363,7 +1363,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
|
||||||
}
|
}
|
||||||
pair = (uint64_t)i2 << 32 | i1;
|
pair = (uint64_t)i2 << 32 | i1;
|
||||||
qatomic_set((uint64_t *)jmp_addr, pair);
|
qatomic_set((uint64_t *)jmp_addr, pair);
|
||||||
flush_icache_range(jmp_addr, jmp_addr + 8);
|
flush_idcache_range(jmp_addr, jmp_addr, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
|
static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
|
||||||
|
|
|
@ -2660,7 +2660,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
|
||||||
uintptr_t addr)
|
uintptr_t addr)
|
||||||
{
|
{
|
||||||
qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
|
qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
|
||||||
flush_icache_range(jmp_addr, jmp_addr + 4);
|
flush_idcache_range(jmp_addr, jmp_addr, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
|
@ -1753,12 +1753,12 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
|
||||||
/* As per the enclosing if, this is ppc64. Avoid the _Static_assert
|
/* As per the enclosing if, this is ppc64. Avoid the _Static_assert
|
||||||
within qatomic_set that would fail to build a ppc32 host. */
|
within qatomic_set that would fail to build a ppc32 host. */
|
||||||
qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
|
qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
|
||||||
flush_icache_range(jmp_addr, jmp_addr + 8);
|
flush_idcache_range(jmp_addr, jmp_addr, 8);
|
||||||
} else {
|
} else {
|
||||||
intptr_t diff = addr - jmp_addr;
|
intptr_t diff = addr - jmp_addr;
|
||||||
tcg_debug_assert(in_range_b(diff));
|
tcg_debug_assert(in_range_b(diff));
|
||||||
qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
|
qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
|
||||||
flush_icache_range(jmp_addr, jmp_addr + 4);
|
flush_idcache_range(jmp_addr, jmp_addr, 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1836,7 +1836,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
|
||||||
if (!USE_REG_TB) {
|
if (!USE_REG_TB) {
|
||||||
qatomic_set((uint32_t *)jmp_addr,
|
qatomic_set((uint32_t *)jmp_addr,
|
||||||
deposit32(CALL, 0, 30, br_disp >> 2));
|
deposit32(CALL, 0, 30, br_disp >> 2));
|
||||||
flush_icache_range(jmp_addr, jmp_addr + 4);
|
flush_idcache_range(jmp_addr, jmp_addr, 4);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1860,5 +1860,5 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
|
||||||
}
|
}
|
||||||
|
|
||||||
qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
|
qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
|
||||||
flush_icache_range(jmp_addr, jmp_addr + 8);
|
flush_idcache_range(jmp_addr, jmp_addr, 8);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1079,7 +1079,8 @@ void tcg_prologue_init(TCGContext *s)
|
||||||
|
|
||||||
buf1 = s->code_ptr;
|
buf1 = s->code_ptr;
|
||||||
#ifndef CONFIG_TCG_INTERPRETER
|
#ifndef CONFIG_TCG_INTERPRETER
|
||||||
flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
|
flush_idcache_range((uintptr_t)buf0, (uintptr_t)buf0,
|
||||||
|
tcg_ptr_byte_diff(buf1, buf0));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Deduct the prologue from the buffer. */
|
/* Deduct the prologue from the buffer. */
|
||||||
|
@ -4328,7 +4329,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
||||||
|
|
||||||
#ifndef CONFIG_TCG_INTERPRETER
|
#ifndef CONFIG_TCG_INTERPRETER
|
||||||
/* flush instruction cache */
|
/* flush instruction cache */
|
||||||
flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
|
flush_idcache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_buf,
|
||||||
|
tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return tcg_current_code_size(s);
|
return tcg_current_code_size(s);
|
||||||
|
|
|
@ -21,29 +21,32 @@
|
||||||
#include <sys/cachectl.h>
|
#include <sys/cachectl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void flush_icache_range(uintptr_t start, uintptr_t stop)
|
void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
|
||||||
{
|
{
|
||||||
cacheflush((void *)start, stop - start, ICACHE);
|
if (rx != rw) {
|
||||||
|
cacheflush((void *)rw, len, DCACHE);
|
||||||
|
}
|
||||||
|
cacheflush((void *)rx, len, ICACHE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(__powerpc__)
|
#elif defined(__powerpc__)
|
||||||
|
|
||||||
void flush_icache_range(uintptr_t start, uintptr_t stop)
|
void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
|
||||||
{
|
{
|
||||||
uintptr_t p, start1, stop1;
|
uintptr_t p, b, e;
|
||||||
size_t dsize = qemu_dcache_linesize;
|
size_t dsize = qemu_dcache_linesize;
|
||||||
size_t isize = qemu_icache_linesize;
|
size_t isize = qemu_icache_linesize;
|
||||||
|
|
||||||
start1 = start & ~(dsize - 1);
|
b = rw & ~(dsize - 1);
|
||||||
stop1 = (stop + dsize - 1) & ~(dsize - 1);
|
e = (rw + len + dsize - 1) & ~(dsize - 1);
|
||||||
for (p = start1; p < stop1; p += dsize) {
|
for (p = b; p < e; p += dsize) {
|
||||||
asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
|
asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
|
||||||
}
|
}
|
||||||
asm volatile ("sync" : : : "memory");
|
asm volatile ("sync" : : : "memory");
|
||||||
|
|
||||||
start &= start & ~(isize - 1);
|
b = rx & ~(isize - 1);
|
||||||
stop1 = (stop + isize - 1) & ~(isize - 1);
|
e = (rx + len + isize - 1) & ~(isize - 1);
|
||||||
for (p = start1; p < stop1; p += isize) {
|
for (p = b; p < e; p += isize) {
|
||||||
asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
|
asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
|
||||||
}
|
}
|
||||||
asm volatile ("sync" : : : "memory");
|
asm volatile ("sync" : : : "memory");
|
||||||
|
@ -52,20 +55,23 @@ void flush_icache_range(uintptr_t start, uintptr_t stop)
|
||||||
|
|
||||||
#elif defined(__sparc__)
|
#elif defined(__sparc__)
|
||||||
|
|
||||||
void flush_icache_range(uintptr_t start, uintptr_t stop)
|
void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
|
||||||
{
|
{
|
||||||
uintptr_t p;
|
/* No additional data flush to the RW virtual address required. */
|
||||||
|
uintptr_t p, end = (rx + len + 7) & -8;
|
||||||
for (p = start & -8; p < ((stop + 7) & -8); p += 8) {
|
for (p = rx & -8; p < end; p += 8) {
|
||||||
__asm__ __volatile__("flush\t%0" : : "r" (p));
|
__asm__ __volatile__("flush\t%0" : : "r" (p));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void flush_icache_range(uintptr_t start, uintptr_t stop)
|
void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
|
||||||
{
|
{
|
||||||
__builtin___clear_cache((char *)start, (char *)stop);
|
if (rw != rx) {
|
||||||
|
__builtin___clear_cache((char *)rw, (char *)rw + len);
|
||||||
|
}
|
||||||
|
__builtin___clear_cache((char *)rx, (char *)rx + len);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -166,9 +166,11 @@ static void fallback_cache_info(int *isize, int *dsize)
|
||||||
*isize = *dsize;
|
*isize = *dsize;
|
||||||
} else {
|
} else {
|
||||||
#if defined(_ARCH_PPC)
|
#if defined(_ARCH_PPC)
|
||||||
/* For PPC, we're going to use the icache size computed for
|
/*
|
||||||
flush_icache_range. Which means that we must use the
|
* For PPC, we're going to use the cache sizes computed for
|
||||||
architecture minimum. */
|
* flush_idcache_range. Which means that we must use the
|
||||||
|
* architecture minimum.
|
||||||
|
*/
|
||||||
*isize = *dsize = 16;
|
*isize = *dsize = 16;
|
||||||
#else
|
#else
|
||||||
/* Otherwise, 64 bytes is not uncommon. */
|
/* Otherwise, 64 bytes is not uncommon. */
|
||||||
|
|
Loading…
Reference in New Issue