target-arm: A64: Implement DC ZVA

Implement the DC ZVA instruction, which clears a block of memory.
The fast path obtains a pointer to the underlying RAM via the TCG TLB
data structure so we can do a direct memset(), with fallback to a
simple byte-store loop in the slow path.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Acked-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
This commit is contained in:
Peter Maydell 2014-04-15 19:18:41 +01:00
parent 9225d739e7
commit aca3f40b37
7 changed files with 180 additions and 6 deletions

View File

@ -162,3 +162,55 @@
#define stw(p, v) stw_data(p, v) #define stw(p, v) stw_data(p, v)
#define stl(p, v) stl_data(p, v) #define stl(p, v) stl_data(p, v)
#define stq(p, v) stq_data(p, v) #define stq(p, v) stq_data(p, v)
/**
* tlb_vaddr_to_host:
* @env: CPUArchState
* @addr: guest virtual address to look up
* @access_type: 0 for read, 1 for write, 2 for execute
* @mmu_idx: MMU index to use for lookup
*
* Look up the specified guest virtual index in the TCG softmmu TLB.
* If the TLB contains a host virtual address suitable for direct RAM
* access, then return it. Otherwise (TLB miss, TLB entry is for an
* I/O access, etc) return NULL.
*
* This is the equivalent of the initial fast-path code used by
* TCG backends for guest load and store accesses.
*/
static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
int access_type, int mmu_idx)
{
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
target_ulong tlb_addr;
uintptr_t haddr;
switch (access_type) {
case 0:
tlb_addr = tlbentry->addr_read;
break;
case 1:
tlb_addr = tlbentry->addr_write;
break;
case 2:
tlb_addr = tlbentry->addr_code;
break;
default:
g_assert_not_reached();
}
if ((addr & TARGET_PAGE_MASK)
!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
/* TLB entry is for a different page */
return NULL;
}
if (tlb_addr & ~TARGET_PAGE_MASK) {
/* IO access */
return NULL;
}
haddr = addr + env->tlb_table[mmu_idx][index].addend;
return (void *)haddr;
}

View File

@ -150,6 +150,8 @@ typedef struct ARMCPU {
uint32_t reset_cbar; uint32_t reset_cbar;
uint32_t reset_auxcr; uint32_t reset_auxcr;
bool reset_hivecs; bool reset_hivecs;
/* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
uint32_t dcz_blocksize;
} ARMCPU; } ARMCPU;
#define TYPE_AARCH64_CPU "aarch64-cpu" #define TYPE_AARCH64_CPU "aarch64-cpu"

View File

@ -758,7 +758,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
#define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8)) #define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8))
#define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8)) #define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8))
#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8)) #define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8))
#define ARM_LAST_SPECIAL ARM_CP_CURRENTEL #define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | (5 << 8))
#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
/* Used only as a terminator for ARMCPRegInfo lists */ /* Used only as a terminator for ARMCPRegInfo lists */
#define ARM_CP_SENTINEL 0xffff #define ARM_CP_SENTINEL 0xffff
/* Mask of only the flag bits in a type field */ /* Mask of only the flag bits in a type field */

View File

@ -46,6 +46,7 @@ static void aarch64_any_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_V7MP);
set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_AARCH64);
cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */ cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
cpu->dcz_blocksize = 7; /* 512 bytes */
} }
#endif #endif

View File

@ -10,6 +10,8 @@
#include <zlib.h> /* For crc32 */ #include <zlib.h> /* For crc32 */
#ifndef CONFIG_USER_ONLY #ifndef CONFIG_USER_ONLY
#include "exec/softmmu_exec.h"
static inline int get_phys_addr(CPUARMState *env, target_ulong address, static inline int get_phys_addr(CPUARMState *env, target_ulong address,
int access_type, int is_user, int access_type, int is_user,
hwaddr *phys_ptr, int *prot, hwaddr *phys_ptr, int *prot,
@ -1745,6 +1747,29 @@ static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush(CPU(cpu), asid == 0); tlb_flush(CPU(cpu), asid == 0);
} }
static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri)
{
/* We don't implement EL2, so the only control on DC ZVA is the
* bit in the SCTLR which can prohibit access for EL0.
*/
if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_DZE)) {
return CP_ACCESS_TRAP;
}
return CP_ACCESS_OK;
}
static uint64_t aa64_dczid_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
ARMCPU *cpu = arm_env_get_cpu(env);
int dzp_bit = 1 << 4;
/* DZP indicates whether DC ZVA access is allowed */
if (aa64_zva_access(env, NULL) != CP_ACCESS_OK) {
dzp_bit = 0;
}
return cpu->dcz_blocksize | dzp_bit;
}
static const ARMCPRegInfo v8_cp_reginfo[] = { static const ARMCPRegInfo v8_cp_reginfo[] = {
/* Minimal set of EL0-visible registers. This will need to be expanded /* Minimal set of EL0-visible registers. This will need to be expanded
* significantly for system emulation of AArch64 CPUs. * significantly for system emulation of AArch64 CPUs.
@ -1764,13 +1789,18 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
{ .name = "FPSR", .state = ARM_CP_STATE_AA64, { .name = "FPSR", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
.access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write }, .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
/* Prohibit use of DC ZVA. OPTME: implement DC ZVA and allow its use.
* For system mode the DZP bit here will need to be computed, not constant.
*/
{ .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64, { .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0, .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
.access = PL0_R, .type = ARM_CP_CONST, .access = PL0_R, .type = ARM_CP_NO_MIGRATE,
.resetvalue = 0x10 }, .readfn = aa64_dczid_read },
{ .name = "DC_ZVA", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 1,
.access = PL0_W, .type = ARM_CP_DC_ZVA,
#ifndef CONFIG_USER_ONLY
/* Avoid overhead of an access check that always passes in user-mode */
.accessfn = aa64_zva_access,
#endif
},
{ .name = "CURRENTEL", .state = ARM_CP_STATE_AA64, { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2, .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2,
.access = PL1_R, .type = ARM_CP_CURRENTEL }, .access = PL1_R, .type = ARM_CP_CURRENTEL },
@ -3930,6 +3960,88 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
#endif #endif
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
{
/* Implement DC ZVA, which zeroes a fixed-length block of memory.
* Note that we do not implement the (architecturally mandated)
* alignment fault for attempts to use this on Device memory
* (which matches the usual QEMU behaviour of not implementing either
* alignment faults or any memory attribute handling).
*/
ARMCPU *cpu = arm_env_get_cpu(env);
uint64_t blocklen = 4 << cpu->dcz_blocksize;
uint64_t vaddr = vaddr_in & ~(blocklen - 1);
#ifndef CONFIG_USER_ONLY
{
/* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
* the block size so we might have to do more than one TLB lookup.
* We know that in fact for any v8 CPU the page size is at least 4K
* and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
* 1K as an artefact of legacy v5 subpage support being present in the
* same QEMU executable.
*/
int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
void *hostaddr[maxidx];
int try, i;
for (try = 0; try < 2; try++) {
for (i = 0; i < maxidx; i++) {
hostaddr[i] = tlb_vaddr_to_host(env,
vaddr + TARGET_PAGE_SIZE * i,
1, cpu_mmu_index(env));
if (!hostaddr[i]) {
break;
}
}
if (i == maxidx) {
/* If it's all in the TLB it's fair game for just writing to;
* we know we don't need to update dirty status, etc.
*/
for (i = 0; i < maxidx - 1; i++) {
memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
}
memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
return;
}
/* OK, try a store and see if we can populate the tlb. This
* might cause an exception if the memory isn't writable,
* in which case we will longjmp out of here. We must for
* this purpose use the actual register value passed to us
* so that we get the fault address right.
*/
helper_ret_stb_mmu(env, vaddr_in, 0, cpu_mmu_index(env), GETRA());
/* Now we can populate the other TLB entries, if any */
for (i = 0; i < maxidx; i++) {
uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
if (va != (vaddr_in & TARGET_PAGE_MASK)) {
helper_ret_stb_mmu(env, va, 0, cpu_mmu_index(env), GETRA());
}
}
}
/* Slow path (probably attempt to do this to an I/O device or
* similar, or clearing of a block of code we have translations
* cached for). Just do a series of byte writes as the architecture
* demands. It's not worth trying to use a cpu_physical_memory_map(),
* memset(), unmap() sequence here because:
* + we'd need to account for the blocksize being larger than a page
* + the direct-RAM access case is almost always going to be dealt
* with in the fastpath code above, so there's no speed benefit
* + we would have to deal with the map returning NULL because the
* bounce buffer was in use
*/
for (i = 0; i < blocklen; i++) {
helper_ret_stb_mmu(env, vaddr + i, 0, cpu_mmu_index(env), GETRA());
}
}
#else
memset(g2h(vaddr), 0, blocklen);
#endif
}
/* Note that signed overflow is undefined in C. The following routines are /* Note that signed overflow is undefined in C. The following routines are
careful to use unsigned types where modulo arithmetic is required. careful to use unsigned types where modulo arithmetic is required.
Failure to do so _will_ break on newer gcc. */ Failure to do so _will_ break on newer gcc. */

View File

@ -515,6 +515,7 @@ DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64)
#ifdef TARGET_AARCH64 #ifdef TARGET_AARCH64
#include "helper-a64.h" #include "helper-a64.h"

View File

@ -1335,6 +1335,11 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
tcg_rt = cpu_reg(s, rt); tcg_rt = cpu_reg(s, rt);
tcg_gen_movi_i64(tcg_rt, s->current_pl << 2); tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
return; return;
case ARM_CP_DC_ZVA:
/* Writes clear the aligned block of memory which rt points into. */
tcg_rt = cpu_reg(s, rt);
gen_helper_dc_zva(cpu_env, tcg_rt);
return;
default: default:
break; break;
} }