target/s390x: Use cpu_{ld,st}*_mmu in do_csst

Use cpu_ld16_mmu and cpu_st16_mmu to eliminate the special case,
and change all of the *_data_ra functions to match.

Note that we check the alignment of both compare and store
pointers at the top of the function, so MO_ALIGN* may be
safely removed from the individual memory operations.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-05-19 17:38:41 -07:00
parent fbea7a4084
commit ddc0ab5aab
1 changed files with 27 additions and 39 deletions

View File

@ -1737,6 +1737,11 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
uint64_t a2, bool parallel) uint64_t a2, bool parallel)
{ {
uint32_t mem_idx = cpu_mmu_index(env, false); uint32_t mem_idx = cpu_mmu_index(env, false);
MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
uintptr_t ra = GETPC(); uintptr_t ra = GETPC();
uint32_t fc = extract32(env->regs[0], 0, 8); uint32_t fc = extract32(env->regs[0], 0, 8);
uint32_t sc = extract32(env->regs[0], 8, 8); uint32_t sc = extract32(env->regs[0], 8, 8);
@ -1780,15 +1785,17 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
} }
} }
/* All loads happen before all stores. For simplicity, load the entire /*
store value area from the parameter list. */ * All loads happen before all stores. For simplicity, load the entire
svh = cpu_ldq_data_ra(env, pl + 16, ra); * store value area from the parameter list.
svl = cpu_ldq_data_ra(env, pl + 24, ra); */
svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
switch (fc) { switch (fc) {
case 0: case 0:
{ {
uint32_t nv = cpu_ldl_data_ra(env, pl, ra); uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
uint32_t cv = env->regs[r3]; uint32_t cv = env->regs[r3];
uint32_t ov; uint32_t ov;
@ -1801,8 +1808,8 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra); ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
#endif #endif
} else { } else {
ov = cpu_ldl_data_ra(env, a1, ra); ov = cpu_ldl_mmu(env, a1, oi4, ra);
cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra); cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
} }
cc = (ov != cv); cc = (ov != cv);
env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov); env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
@ -1811,21 +1818,20 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
case 1: case 1:
{ {
uint64_t nv = cpu_ldq_data_ra(env, pl, ra); uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
uint64_t cv = env->regs[r3]; uint64_t cv = env->regs[r3];
uint64_t ov; uint64_t ov;
if (parallel) { if (parallel) {
#ifdef CONFIG_ATOMIC64 #ifdef CONFIG_ATOMIC64
MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx); ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
#else #else
/* Note that we asserted !parallel above. */ /* Note that we asserted !parallel above. */
g_assert_not_reached(); g_assert_not_reached();
#endif #endif
} else { } else {
ov = cpu_ldq_data_ra(env, a1, ra); ov = cpu_ldq_mmu(env, a1, oi8, ra);
cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra); cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
} }
cc = (ov != cv); cc = (ov != cv);
env->regs[r3] = ov; env->regs[r3] = ov;
@ -1834,27 +1840,19 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
case 2: case 2:
{ {
uint64_t nvh = cpu_ldq_data_ra(env, pl, ra); Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
Int128 nv = int128_make128(nvl, nvh);
Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
Int128 ov; Int128 ov;
if (!parallel) { if (!parallel) {
uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); ov = cpu_ld16_mmu(env, a1, oi16, ra);
uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
ov = int128_make128(ol, oh);
cc = !int128_eq(ov, cv); cc = !int128_eq(ov, cv);
if (cc) { if (cc) {
nv = ov; nv = ov;
} }
cpu_st16_mmu(env, a1, nv, oi16, ra);
cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
} else if (HAVE_CMPXCHG128) { } else if (HAVE_CMPXCHG128) {
MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx); ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
cc = !int128_eq(ov, cv); cc = !int128_eq(ov, cv);
} else { } else {
/* Note that we asserted !parallel above. */ /* Note that we asserted !parallel above. */
@ -1876,29 +1874,19 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
if (cc == 0) { if (cc == 0) {
switch (sc) { switch (sc) {
case 0: case 0:
cpu_stb_data_ra(env, a2, svh >> 56, ra); cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
break; break;
case 1: case 1:
cpu_stw_data_ra(env, a2, svh >> 48, ra); cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
break; break;
case 2: case 2:
cpu_stl_data_ra(env, a2, svh >> 32, ra); cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
break; break;
case 3: case 3:
cpu_stq_data_ra(env, a2, svh, ra); cpu_stq_mmu(env, a2, svh, oi8, ra);
break; break;
case 4: case 4:
if (!parallel) { cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
cpu_stq_data_ra(env, a2 + 0, svh, ra);
cpu_stq_data_ra(env, a2 + 8, svl, ra);
} else if (HAVE_ATOMIC128) {
MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
Int128 sv = int128_make128(svl, svh);
cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
} else {
/* Note that we asserted !parallel above. */
g_assert_not_reached();
}
break; break;
default: default:
g_assert_not_reached(); g_assert_not_reached();