mirror of https://github.com/xemu-project/xemu.git
Hexagon (target/hexagon) Add v68 HVX instructions
The following instructions are added V6_v6mpyvubs10_vxx V6_v6mpyhubs10_vxx V6_v6mpyvubs10 V6_v6mpyhubs10 Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> Reviewed-by: Anton Johansson <anjo@rev.ng> Message-Id: <20230427224057.3766963-5-tsimpson@quicinc.com>
This commit is contained in:
parent
860132e295
commit
f128c0fe10
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
||||
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -730,6 +730,8 @@ DEF_ENC(V6_vmaxb, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 101 ddddd") //
|
|||
DEF_ENC(V6_vsatuwuh, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 110 ddddd") //
|
||||
DEF_ENC(V6_vdealb4w, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 111 ddddd") //
|
||||
|
||||
DEF_ENC(V6_v6mpyvubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 0ii xxxxx")
|
||||
DEF_ENC(V6_v6mpyhubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 1ii xxxxx")
|
||||
|
||||
DEF_ENC(V6_vmpyowh_rnd, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 000 ddddd") //
|
||||
DEF_ENC(V6_vshuffeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 001 ddddd") //
|
||||
|
@ -740,6 +742,10 @@ DEF_ENC(V6_vshufoeh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 101 ddddd") //
|
|||
DEF_ENC(V6_vshufoeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 110 ddddd") //
|
||||
DEF_ENC(V6_vcombine, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 111 ddddd") //
|
||||
|
||||
DEF_ENC(V6_v6mpyvubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 0ii ddddd")
|
||||
DEF_ENC(V6_v6mpyhubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 1ii ddddd")
|
||||
|
||||
|
||||
DEF_ENC(V6_vmpyieoh, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 000 ddddd") //
|
||||
DEF_ENC(V6_vadduwsat, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 001 ddddd") //
|
||||
DEF_ENC(V6_vsathub, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 010 ddddd") //
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
||||
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -116,6 +116,10 @@ ITERATOR_INSN_MPY_SLOT_LATE(WIDTH,TAG, SYNTAX2,DESCR,CODE)
|
|||
EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV), \
|
||||
DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
|
||||
|
||||
#define ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(WIDTH,TAG,SYNTAX,DESCR,CODE) \
|
||||
EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV), \
|
||||
DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
|
||||
|
||||
#define ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
|
||||
ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX2,DESCR,CODE)
|
||||
|
||||
|
@ -2507,6 +2511,281 @@ EXTINSN(V6_vscattermhw , "vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSIO
|
|||
})
|
||||
|
||||
|
||||
ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyvubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "",
|
||||
fHIDE(size2s_t c00;)
|
||||
fGET10BIT(c00, VvvV.v[0].uw[i], 0)
|
||||
fHIDE(size2s_t c01;)
|
||||
fGET10BIT(c01, VvvV.v[0].uw[i], 1)
|
||||
fHIDE(size2s_t c02;)
|
||||
fGET10BIT(c02, VvvV.v[0].uw[i], 2)
|
||||
|
||||
fHIDE(size2s_t c10;)
|
||||
fGET10BIT(c10, VvvV.v[1].uw[i], 0)
|
||||
fHIDE(size2s_t c11;)
|
||||
fGET10BIT(c11, VvvV.v[1].uw[i], 1)
|
||||
fHIDE(size2s_t c12;)
|
||||
fGET10BIT(c12, VvvV.v[1].uw[i], 2)
|
||||
|
||||
if (uiV == 0) {
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
} else if (uiV == 1) {
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
} else if (uiV == 2) {
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
} else if (uiV == 3) {
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
|
||||
}
|
||||
)
|
||||
ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyhubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "",
|
||||
fHIDE(size2s_t c00;)
|
||||
fGET10BIT(c00, VvvV.v[0].uw[i], 0)
|
||||
fHIDE(size2s_t c01;)
|
||||
fGET10BIT(c01, VvvV.v[0].uw[i], 1)
|
||||
fHIDE(size2s_t c02;)
|
||||
fGET10BIT(c02, VvvV.v[0].uw[i], 2)
|
||||
fHIDE(size2s_t c10;)
|
||||
fGET10BIT(c10, VvvV.v[1].uw[i], 0)
|
||||
fHIDE(size2s_t c11;)
|
||||
fGET10BIT(c11, VvvV.v[1].uw[i], 1)
|
||||
fHIDE(size2s_t c12;)
|
||||
fGET10BIT(c12, VvvV.v[1].uw[i], 2)
|
||||
|
||||
if (uiV == 0) {
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
} else if (uiV == 1) {
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
} else if (uiV == 2) {
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
} else if (uiV == 3) {
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
|
||||
VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
|
||||
VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyvubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "",
|
||||
fHIDE(short c00;)
|
||||
fGET10BIT(c00, VvvV.v[0].uw[i], 0)
|
||||
fHIDE(short c01;)
|
||||
fGET10BIT(c01, VvvV.v[0].uw[i], 1)
|
||||
fHIDE(short c02;)
|
||||
fGET10BIT(c02, VvvV.v[0].uw[i], 2)
|
||||
fHIDE(short c10;)
|
||||
fGET10BIT(c10, VvvV.v[1].uw[i], 0)
|
||||
fHIDE(short c11;)
|
||||
fGET10BIT(c11, VvvV.v[1].uw[i], 1)
|
||||
fHIDE(short c12;)
|
||||
fGET10BIT(c12, VvvV.v[1].uw[i], 2)
|
||||
|
||||
|
||||
|
||||
if (uiV == 0) {
|
||||
VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
} else if (uiV == 1) {
|
||||
VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
} else if (uiV == 2) {
|
||||
VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
} else if (uiV == 3) {
|
||||
VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02);
|
||||
|
||||
VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
|
||||
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
|
||||
}
|
||||
)
|
||||
|
||||
ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyhubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "",
|
||||
fHIDE(short c00;)
|
||||
fGET10BIT(c00, VvvV.v[0].uw[i], 0)
|
||||
fHIDE(short c01;)
|
||||
fGET10BIT(c01, VvvV.v[0].uw[i], 1)
|
||||
fHIDE(short c02;)
|
||||
fGET10BIT(c02, VvvV.v[0].uw[i], 2)
|
||||
fHIDE(short c10;)
|
||||
fGET10BIT(c10, VvvV.v[1].uw[i], 0)
|
||||
fHIDE(short c11;)
|
||||
fGET10BIT(c11, VvvV.v[1].uw[i], 1)
|
||||
fHIDE(short c12;)
|
||||
fGET10BIT(c12, VvvV.v[1].uw[i], 2)
|
||||
|
||||
if (uiV == 0) {
|
||||
VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
} else if (uiV == 1) {
|
||||
VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
} else if (uiV == 2) {
|
||||
VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
} else if (uiV == 3) {
|
||||
VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
|
||||
VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02);
|
||||
|
||||
VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
|
||||
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
|
||||
VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
EXTINSN(V6_vscattermhwq, "if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA_DV,A_CVI_VM,A_MEMLIKE), "Scatter halfwords conditional",
|
||||
{
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
||||
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -346,4 +346,11 @@
|
|||
#define fUARCH_NOTE_PUMP_2X()
|
||||
|
||||
#define IV1DEAD()
|
||||
|
||||
#define fGET10BIT(COE, VAL, POS) \
|
||||
do { \
|
||||
COE = (sextract32(VAL, 24 + 2 * POS, 2) << 8) | \
|
||||
extract32(VAL, POS * 8, 8); \
|
||||
} while (0);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue