diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c index f8c3fdb103..6a82849ef7 100644 --- a/target/mips/tcg/mxu_translate.c +++ b/target/mips/tcg/mxu_translate.c @@ -326,7 +326,7 @@ * ├─ 110111 ─ OPC_MXU_Q16SAR * │ 23..22 * ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL - * │ └─ 01 ─ OPC_MXU_Q8MULSU + * │ └─ 10 ─ OPC_MXU_Q8MULSU * │ * │ 20..18 * ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ @@ -404,6 +404,7 @@ enum { OPC_MXU_Q16SAR = 0x37, OPC_MXU__POOL19 = 0x38, OPC_MXU__POOL20 = 0x39, + OPC_MXU__POOL21 = 0x3A, }; @@ -539,7 +540,7 @@ enum { */ enum { OPC_MXU_Q8MUL = 0x00, - OPC_MXU_Q8MULSU = 0x01, + OPC_MXU_Q8MULSU = 0x02, }; /* @@ -554,6 +555,15 @@ enum { OPC_MXU_S32MOVN = 0x05, }; +/* + * MXU pool 21 + */ +enum { + OPC_MXU_Q8MAC = 0x00, + OPC_MXU_Q8MACSU = 0x02, +}; + + /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */ #define MXU_APTN1_A 0 #define MXU_APTN1_S 1 @@ -1328,13 +1338,17 @@ static void gen_mxu_s16mad(DisasContext *ctx) } /* - * Q8MUL XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply - * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply + * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply + * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply + * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply + * and accumulate + * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply + * and accumulate */ -static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx) +static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac) { TCGv t0, t1, t2, t3, t4, t5, t6, t7; - uint32_t XRa, XRb, XRc, XRd, sel; + uint32_t XRa, XRb, XRc, XRd, aptn2; t0 = tcg_temp_new(); t1 = tcg_temp_new(); @@ -1349,54 +1363,60 @@ static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx) XRb = extract32(ctx->opcode, 10, 4); XRc = extract32(ctx->opcode, 14, 4); XRd = extract32(ctx->opcode, 18, 4); - sel = extract32(ctx->opcode, 22, 2); + aptn2 = extract32(ctx->opcode, 24, 2); gen_load_mxu_gpr(t3, XRb); gen_load_mxu_gpr(t7, XRc); - if (sel == 0x2) { - /* Q8MULSU */ - tcg_gen_ext8s_tl(t0, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8s_tl(t1, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8s_tl(t2, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8s_tl(t3, t3); + if (su) { + /* Q8MULSU / Q8MACSU */ + tcg_gen_sextract_tl(t0, t3, 0, 8); + tcg_gen_sextract_tl(t1, t3, 8, 8); + tcg_gen_sextract_tl(t2, t3, 16, 8); + tcg_gen_sextract_tl(t3, t3, 24, 8); } else { - /* Q8MUL */ - tcg_gen_ext8u_tl(t0, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8u_tl(t1, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8u_tl(t2, t3); - tcg_gen_shri_tl(t3, t3, 8); - tcg_gen_ext8u_tl(t3, t3); + /* Q8MUL / Q8MAC */ + tcg_gen_extract_tl(t0, t3, 0, 8); + tcg_gen_extract_tl(t1, t3, 8, 8); + tcg_gen_extract_tl(t2, t3, 16, 8); + tcg_gen_extract_tl(t3, t3, 24, 8); } - tcg_gen_ext8u_tl(t4, t7); - tcg_gen_shri_tl(t7, t7, 8); - tcg_gen_ext8u_tl(t5, t7); - tcg_gen_shri_tl(t7, t7, 8); - tcg_gen_ext8u_tl(t6, t7); - tcg_gen_shri_tl(t7, t7, 8); - tcg_gen_ext8u_tl(t7, t7); + tcg_gen_extract_tl(t4, t7, 0, 8); + tcg_gen_extract_tl(t5, t7, 8, 8); + tcg_gen_extract_tl(t6, t7, 16, 8); + tcg_gen_extract_tl(t7, t7, 24, 8); tcg_gen_mul_tl(t0, t0, t4); tcg_gen_mul_tl(t1, t1, t5); tcg_gen_mul_tl(t2, t2, t6); tcg_gen_mul_tl(t3, t3, t7); - tcg_gen_andi_tl(t0, t0, 0xFFFF); - tcg_gen_andi_tl(t1, t1, 0xFFFF); - tcg_gen_andi_tl(t2, t2, 0xFFFF); - tcg_gen_andi_tl(t3, t3, 0xFFFF); + if (mac) { + gen_load_mxu_gpr(t4, XRd); + gen_load_mxu_gpr(t5, XRa); + tcg_gen_extract_tl(t6, t4, 0, 16); + tcg_gen_extract_tl(t7, t4, 16, 16); + if (aptn2 & 1) { + tcg_gen_sub_tl(t0, t6, t0); + tcg_gen_sub_tl(t1, t7, t1); + } else { + tcg_gen_add_tl(t0, t6, t0); + tcg_gen_add_tl(t1, t7, t1); + } + tcg_gen_extract_tl(t6, t5, 0, 16); + tcg_gen_extract_tl(t7, t5, 16, 16); + if (aptn2 & 2) { + tcg_gen_sub_tl(t2, t6, t2); + tcg_gen_sub_tl(t3, t7, t3); + } else { + tcg_gen_add_tl(t2, t6, t2); + tcg_gen_add_tl(t3, t7, t3); + } + } - tcg_gen_shli_tl(t1, t1, 16); - tcg_gen_shli_tl(t3, t3, 16); - - tcg_gen_or_tl(t0, t0, t1); - tcg_gen_or_tl(t1, t2, t3); + tcg_gen_deposit_tl(t0, t0, t1, 16, 16); + tcg_gen_deposit_tl(t1, t2, t3, 16, 16); gen_store_mxu_gpr(t0, XRd); gen_store_mxu_gpr(t1, XRa); @@ -4548,12 +4568,14 @@ static void decode_opc_mxu__pool18(DisasContext *ctx) static void decode_opc_mxu__pool19(DisasContext *ctx) { - uint32_t opcode = extract32(ctx->opcode, 22, 2); + uint32_t opcode = extract32(ctx->opcode, 22, 4); switch (opcode) { case OPC_MXU_Q8MUL: + gen_mxu_q8mul_mac(ctx, false, false); + break; case OPC_MXU_Q8MULSU: - gen_mxu_q8mul_q8mulsu(ctx); + gen_mxu_q8mul_mac(ctx, true, false); break; default: MIPS_INVAL("decode_opc_mxu"); @@ -4592,6 +4614,25 @@ static void decode_opc_mxu__pool20(DisasContext *ctx) } } +static void decode_opc_mxu__pool21(DisasContext *ctx) +{ + uint32_t opcode = extract32(ctx->opcode, 22, 2); + + switch (opcode) { + case OPC_MXU_Q8MAC: + gen_mxu_q8mul_mac(ctx, false, true); + break; + case OPC_MXU_Q8MACSU: + gen_mxu_q8mul_mac(ctx, true, true); + break; + default: + MIPS_INVAL("decode_opc_mxu"); + gen_reserved_instruction(ctx); + break; + } +} + + bool decode_ase_mxu(DisasContext *ctx, uint32_t insn) { uint32_t opcode = extract32(insn, 0, 6); @@ -4755,6 +4796,9 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn) case OPC_MXU__POOL20: decode_opc_mxu__pool20(ctx); break; + case OPC_MXU__POOL21: + decode_opc_mxu__pool21(ctx); + break; default: return false; }