mirror of https://github.com/xemu-project/xemu.git
target/ppc: Moved VMLADDUHM to decodetree and use gvec
This patch moves VMLADDUHM to decodetree a creates a gvec implementation using mul_vec and add_vec. rept loop master patch 8 12500 0,01810500 0,00903100 (-50.1%) 25 4000 0,01739400 0,00747700 (-57.0%) 100 1000 0,01843600 0,00901400 (-51.1%) 500 200 0,02574600 0,01971000 (-23.4%) 2500 40 0,05921600 0,07121800 (+20.3%) 8000 12 0,15326700 0,21725200 (+41.7%) The significant difference in performance when REPT is low and LOOP is high I think is due to the fact that the new implementation has a higher translation time, as when using a helper only 5 TCGop are used but with the patch a total of 10 TCGop are needed (Power lacks a direct mul_vec equivalent so this instruction is implemented with the help of 5 others, vmuleu, vmulou, vmrgh, vmrgl and vpkum). Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20221019125040.48028-2-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
This commit is contained in:
parent
b35bf5f2d7
commit
dc46167a22
|
@ -264,7 +264,7 @@ DEF_HELPER_FLAGS_4(VMSUMUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
|
||||||
DEF_HELPER_5(VMSUMUHS, void, env, avr, avr, avr, avr)
|
DEF_HELPER_5(VMSUMUHS, void, env, avr, avr, avr, avr)
|
||||||
DEF_HELPER_FLAGS_4(VMSUMSHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
|
DEF_HELPER_FLAGS_4(VMSUMSHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
|
||||||
DEF_HELPER_5(VMSUMSHS, void, env, avr, avr, avr, avr)
|
DEF_HELPER_5(VMSUMSHS, void, env, avr, avr, avr, avr)
|
||||||
DEF_HELPER_FLAGS_4(vmladduhm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
|
DEF_HELPER_FLAGS_5(VMLADDUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
|
||||||
DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32)
|
DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32)
|
||||||
DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env)
|
DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env)
|
||||||
DEF_HELPER_3(lvebx, void, env, avr, tl)
|
DEF_HELPER_3(lvebx, void, env, avr, tl)
|
||||||
|
|
|
@ -693,6 +693,8 @@ VMSUMUHS 000100 ..... ..... ..... ..... 100111 @VA
|
||||||
VMSUMCUD 000100 ..... ..... ..... ..... 010111 @VA
|
VMSUMCUD 000100 ..... ..... ..... ..... 010111 @VA
|
||||||
VMSUMUDM 000100 ..... ..... ..... ..... 100011 @VA
|
VMSUMUDM 000100 ..... ..... ..... ..... 100011 @VA
|
||||||
|
|
||||||
|
VMLADDUHM 000100 ..... ..... ..... ..... 100010 @VA
|
||||||
|
|
||||||
## Vector String Instructions
|
## Vector String Instructions
|
||||||
|
|
||||||
VSTRIBL 000100 ..... 00000 ..... . 0000001101 @VX_tb_rc
|
VSTRIBL 000100 ..... 00000 ..... . 0000001101 @VX_tb_rc
|
||||||
|
|
|
@ -974,7 +974,8 @@ void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
|
void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
|
||||||
|
uint32_t v)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
|
|
@ -6855,7 +6855,6 @@ GEN_HANDLER(lvsl, 0x1f, 0x06, 0x00, 0x00000001, PPC_ALTIVEC),
|
||||||
GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
|
GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
|
||||||
GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
|
GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
|
||||||
GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
|
GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
|
||||||
GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x00000000, PPC_ALTIVEC),
|
|
||||||
#if defined(TARGET_PPC64)
|
#if defined(TARGET_PPC64)
|
||||||
GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE,
|
GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE,
|
||||||
PPC2_ISA300),
|
PPC2_ISA300),
|
||||||
|
|
|
@ -2523,24 +2523,6 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
|
||||||
|
|
||||||
GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)
|
GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)
|
||||||
|
|
||||||
static void gen_vmladduhm(DisasContext *ctx)
|
|
||||||
{
|
|
||||||
TCGv_ptr ra, rb, rc, rd;
|
|
||||||
if (unlikely(!ctx->altivec_enabled)) {
|
|
||||||
gen_exception(ctx, POWERPC_EXCP_VPU);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
ra = gen_avr_ptr(rA(ctx->opcode));
|
|
||||||
rb = gen_avr_ptr(rB(ctx->opcode));
|
|
||||||
rc = gen_avr_ptr(rC(ctx->opcode));
|
|
||||||
rd = gen_avr_ptr(rD(ctx->opcode));
|
|
||||||
gen_helper_vmladduhm(rd, ra, rb, rc);
|
|
||||||
tcg_temp_free_ptr(ra);
|
|
||||||
tcg_temp_free_ptr(rb);
|
|
||||||
tcg_temp_free_ptr(rc);
|
|
||||||
tcg_temp_free_ptr(rd);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool do_va_helper(DisasContext *ctx, arg_VA *a,
|
static bool do_va_helper(DisasContext *ctx, arg_VA *a,
|
||||||
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
|
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
|
||||||
{
|
{
|
||||||
|
@ -2569,6 +2551,36 @@ TRANS_FLAGS2(ALTIVEC_207, VSUBECUQ, do_va_helper, gen_helper_VSUBECUQ)
|
||||||
TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
|
TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
|
||||||
TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)
|
TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)
|
||||||
|
|
||||||
|
static void gen_vmladduhm_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
|
||||||
|
TCGv_vec c)
|
||||||
|
{
|
||||||
|
tcg_gen_mul_vec(vece, t, a, b);
|
||||||
|
tcg_gen_add_vec(vece, t, t, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool trans_VMLADDUHM(DisasContext *ctx, arg_VA *a)
|
||||||
|
{
|
||||||
|
static const TCGOpcode vecop_list[] = {
|
||||||
|
INDEX_op_add_vec, INDEX_op_mul_vec, 0
|
||||||
|
};
|
||||||
|
|
||||||
|
static const GVecGen4 op = {
|
||||||
|
.fno = gen_helper_VMLADDUHM,
|
||||||
|
.fniv = gen_vmladduhm_vec,
|
||||||
|
.opt_opc = vecop_list,
|
||||||
|
.vece = MO_16
|
||||||
|
};
|
||||||
|
|
||||||
|
REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
|
||||||
|
REQUIRE_VECTOR(ctx);
|
||||||
|
|
||||||
|
tcg_gen_gvec_4(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
||||||
|
avr_full_offset(a->vrb), avr_full_offset(a->rc),
|
||||||
|
16, 16, &op);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool trans_VSEL(DisasContext *ctx, arg_VA *a)
|
static bool trans_VSEL(DisasContext *ctx, arg_VA *a)
|
||||||
{
|
{
|
||||||
REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
|
REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
|
||||||
|
|
Loading…
Reference in New Issue