Hexagon (target/hexagon) update

-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCgAdFiEENjXHiM5iuR/UxZq0ewJE+xLeRCIFAmQGzGYACgkQewJE+xLe
 RCJfxggAgAY9fqUxKk5FxziMX6sVxx2SiYGU1biABtHyduQvJ43U7In4cnbC65FL
 Pn+ZSixPRh51Ij2s9FTzDQ8IUNt/k1W7gCoEC3I0wDbQHdICEIlx30ry68QlUgOa
 T6ZDquJkanwFjb7kMnyM4LRBBxLG+OuYvKe1e+nzun0LDlfTS/sHitRlf4AJEBOK
 9h/Bdy81RcWlLzlc5tmD0f9rhtmkkFCTu/TGLt4G6sfn1xZbRdh5N1cFfUShPlQM
 qAgj+JgBOQoPKaBhQZnA3Ps9ZEM4+/8KPsr5oGweAcjqD8+kMGCmi2jv+60ES5Uq
 EDpn25COw7BVAo6vP/JfBEiZTg+YpA==
 =FDSG
 -----END PGP SIGNATURE-----

Merge tag 'pull-hex-20230306' of https://github.com/quic/qemu into staging

Hexagon (target/hexagon) update

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEENjXHiM5iuR/UxZq0ewJE+xLeRCIFAmQGzGYACgkQewJE+xLe
# RCJfxggAgAY9fqUxKk5FxziMX6sVxx2SiYGU1biABtHyduQvJ43U7In4cnbC65FL
# Pn+ZSixPRh51Ij2s9FTzDQ8IUNt/k1W7gCoEC3I0wDbQHdICEIlx30ry68QlUgOa
# T6ZDquJkanwFjb7kMnyM4LRBBxLG+OuYvKe1e+nzun0LDlfTS/sHitRlf4AJEBOK
# 9h/Bdy81RcWlLzlc5tmD0f9rhtmkkFCTu/TGLt4G6sfn1xZbRdh5N1cFfUShPlQM
# qAgj+JgBOQoPKaBhQZnA3Ps9ZEM4+/8KPsr5oGweAcjqD8+kMGCmi2jv+60ES5Uq
# EDpn25COw7BVAo6vP/JfBEiZTg+YpA==
# =FDSG
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 07 Mar 2023 05:32:22 GMT
# gpg:                using RSA key 3635C788CE62B91FD4C59AB47B0244FB12DE4422
# gpg: Good signature from "Taylor Simpson (Rock on) <tsimpson@quicinc.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 3635 C788 CE62 B91F D4C5  9AB4 7B02 44FB 12DE 4422

* tag 'pull-hex-20230306' of https://github.com/quic/qemu:
  Hexagon (target/hexagon) Improve code gen for predicated HVX instructions
  Hexagon (target/hexagon) Reduce manipulation of slot_cancelled
  Hexagon (target/hexagon) Remove gen_log_predicated_reg_write[_pair]
  Hexagon (target/hexagon) Change subtract from zero to change sign
  Hexagon (tests/tcg/hexagon) Enable HVX tests
  Hexagon (tests/tcg/hexagon) Remove __builtin from scatter_gather
  Hexagon (tests/tcg/hexagon) Update preg_alias.c
  Hexagon (target/hexagon) Analyze packet for HVX
  Hexagon (target/hexagon) Don't set pkt_has_store_s1 when not needed
  Hexagon (target/hexagon) Analyze packet before generating TCG
  Hexagon (target/hexagon) Add overrides for dealloc-return instructions
  Hexagon (target/hexagon) Add overrides for endloop1/endloop01
  Hexagon (target/hexagon) Add overrides for callr
  Hexagon (target/hexagon) Add overrides for jumpr31 instructions
  target/hexagon/idef-parser: Remove unused code paths
  target/hexagon/idef-parser: Elide mov in gen_pred_assign
  Hexagon (target/hexagon) Restore --disable-hexagon-idef-parser build

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2023-03-10 11:31:22 +00:00
commit aa4af82129
26 changed files with 1198 additions and 818 deletions

View File

@ -52,6 +52,7 @@ header files in <BUILD_DIR>/target/hexagon
gen_tcg_func_table.py -> tcg_func_table_generated.c.inc gen_tcg_func_table.py -> tcg_func_table_generated.c.inc
gen_helper_funcs.py -> helper_funcs_generated.c.inc gen_helper_funcs.py -> helper_funcs_generated.c.inc
gen_idef_parser_funcs.py -> idef_parser_input.h gen_idef_parser_funcs.py -> idef_parser_input.h
gen_analyze_funcs.py -> analyze_funcs_generated.c.inc
Qemu helper functions have 3 parts Qemu helper functions have 3 parts
DEF_HELPER declaration indicates the signature of the helper DEF_HELPER declaration indicates the signature of the helper
@ -87,7 +88,6 @@ tcg_funcs_generated.c.inc
TCGv RtV = hex_gpr[insn->regno[2]]; TCGv RtV = hex_gpr[insn->regno[2]];
gen_helper_A2_add(RdV, cpu_env, RsV, RtV); gen_helper_A2_add(RdV, cpu_env, RsV, RtV);
gen_log_reg_write(RdN, RdV); gen_log_reg_write(RdN, RdV);
ctx_log_reg_write(ctx, RdN);
} }
helper_funcs_generated.c.inc helper_funcs_generated.c.inc
@ -136,12 +136,9 @@ For HVX vectors, the generator behaves slightly differently. The wide vectors
won't fit in a TCGv or TCGv_i64, so we pass TCGv_ptr variables to pass the won't fit in a TCGv or TCGv_i64, so we pass TCGv_ptr variables to pass the
address to helper functions. Here's an example for an HVX vector-add-word address to helper functions. Here's an example for an HVX vector-add-word
istruction. istruction.
static void generate_V6_vaddw( static void generate_V6_vaddw(DisasContext *ctx)
CPUHexagonState *env,
DisasContext *ctx,
Insn *insn,
Packet *pkt)
{ {
Insn *insn __attribute__((unused)) = ctx->insn;
const int VdN = insn->regno[0]; const int VdN = insn->regno[0];
const intptr_t VdV_off = const intptr_t VdV_off =
ctx_future_vreg_off(ctx, VdN, 1, true); ctx_future_vreg_off(ctx, VdN, 1, true);
@ -157,10 +154,7 @@ istruction.
TCGv_ptr VvV = tcg_temp_new_ptr(); TCGv_ptr VvV = tcg_temp_new_ptr();
tcg_gen_addi_ptr(VuV, cpu_env, VuV_off); tcg_gen_addi_ptr(VuV, cpu_env, VuV_off);
tcg_gen_addi_ptr(VvV, cpu_env, VvV_off); tcg_gen_addi_ptr(VvV, cpu_env, VvV_off);
TCGv slot = tcg_constant_tl(insn->slot); gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV);
gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV, slot);
gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false);
ctx_log_vreg_write(ctx, VdN, EXT_DFL, false);
} }
Notice that we also generate a variable named <operand>_off for each operand of Notice that we also generate a variable named <operand>_off for each operand of
@ -173,12 +167,9 @@ functions from tcg-op-gvec.h. Here's the override for this instruction.
Finally, we notice that the override doesn't use the TCGv_ptr variables, so Finally, we notice that the override doesn't use the TCGv_ptr variables, so
we don't generate them when an override is present. Here is what we generate we don't generate them when an override is present. Here is what we generate
when the override is present. when the override is present.
static void generate_V6_vaddw( static void generate_V6_vaddw(DisasContext *ctx)
CPUHexagonState *env,
DisasContext *ctx,
Insn *insn,
Packet *pkt)
{ {
Insn *insn __attribute__((unused)) = ctx->insn;
const int VdN = insn->regno[0]; const int VdN = insn->regno[0];
const intptr_t VdV_off = const intptr_t VdV_off =
ctx_future_vreg_off(ctx, VdN, 1, true); ctx_future_vreg_off(ctx, VdN, 1, true);
@ -189,10 +180,14 @@ when the override is present.
const intptr_t VvV_off = const intptr_t VvV_off =
vreg_src_off(ctx, VvN); vreg_src_off(ctx, VvN);
fGEN_TCG_V6_vaddw({ fHIDE(int i;) fVFOREACH(32, i) { VdV.w[i] = VuV.w[i] + VvV.w[i] ; } }); fGEN_TCG_V6_vaddw({ fHIDE(int i;) fVFOREACH(32, i) { VdV.w[i] = VuV.w[i] + VvV.w[i] ; } });
gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false);
ctx_log_vreg_write(ctx, VdN, EXT_DFL, false);
} }
We also generate an analyze_<tag> function for each instruction. Currently,
these functions record the writes to registers by calling ctx_log_*. During
gen_start_packet, we invoke the analyze_<tag> function for each instruction in
the packet, and we mark the implicit writes. After the analysis is performed,
we initialize hex_new_value for each of the predicated assignments.
In addition to instruction semantics, we use a generator to create the decode In addition to instruction semantics, we use a generator to create the decode
tree. This generation is also a two step process. The first step is to run tree. This generation is also a two step process. The first step is to run
target/hexagon/gen_dectree_import.c to produce target/hexagon/gen_dectree_import.c to produce
@ -277,10 +272,8 @@ For Hexagon Vector eXtensions (HVX), the following fields are used
VRegs Vector registers VRegs Vector registers
future_VRegs Registers to be stored during packet commit future_VRegs Registers to be stored during packet commit
tmp_VRegs Temporary registers *not* stored during commit tmp_VRegs Temporary registers *not* stored during commit
VRegs_updated Mask of predicated vector writes
QRegs Q (vector predicate) registers QRegs Q (vector predicate) registers
future_QRegs Registers to be stored during packet commit future_QRegs Registers to be stored during packet commit
QRegs_updated Mask of predicated vector writes
*** Debugging *** *** Debugging ***

View File

@ -44,6 +44,7 @@ DEF_ATTRIB(MEMSIZE_1B, "Memory width is 1 byte", "", "")
DEF_ATTRIB(MEMSIZE_2B, "Memory width is 2 bytes", "", "") DEF_ATTRIB(MEMSIZE_2B, "Memory width is 2 bytes", "", "")
DEF_ATTRIB(MEMSIZE_4B, "Memory width is 4 bytes", "", "") DEF_ATTRIB(MEMSIZE_4B, "Memory width is 4 bytes", "", "")
DEF_ATTRIB(MEMSIZE_8B, "Memory width is 8 bytes", "", "") DEF_ATTRIB(MEMSIZE_8B, "Memory width is 8 bytes", "", "")
DEF_ATTRIB(SCALAR_LOAD, "Load is scalar", "", "")
DEF_ATTRIB(SCALAR_STORE, "Store is scalar", "", "") DEF_ATTRIB(SCALAR_STORE, "Store is scalar", "", "")
DEF_ATTRIB(REGWRSIZE_1B, "Memory width is 1 byte", "", "") DEF_ATTRIB(REGWRSIZE_1B, "Memory width is 1 byte", "", "")
DEF_ATTRIB(REGWRSIZE_2B, "Memory width is 2 bytes", "", "") DEF_ATTRIB(REGWRSIZE_2B, "Memory width is 2 bytes", "", "")

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -111,11 +111,8 @@ typedef struct CPUArchState {
MMVector future_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16); MMVector future_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
MMVector tmp_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16); MMVector tmp_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
VRegMask VRegs_updated;
MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16); MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16);
MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16); MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16);
QRegMask QRegs_updated;
/* Temporaries used within instructions */ /* Temporaries used within instructions */
MMVectorPair VuuV QEMU_ALIGNED(16); MMVectorPair VuuV QEMU_ALIGNED(16);

View File

@ -0,0 +1,252 @@
#!/usr/bin/env python3
##
## Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, see <http://www.gnu.org/licenses/>.
##
import sys
import re
import string
import hex_common
##
## Helpers for gen_analyze_func
##
def is_predicated(tag):
return 'A_CONDEXEC' in hex_common.attribdict[tag]
def analyze_opn_old(f, tag, regtype, regid, regno):
regN = "%s%sN" % (regtype, regid)
predicated = "true" if is_predicated(tag) else "false"
if (regtype == "R"):
if (regid in {"ss", "tt"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"dd", "ee", "xx", "yy"}):
f.write(" const int %s = insn->regno[%d];\n" % (regN, regno))
f.write(" ctx_log_reg_write_pair(ctx, %s, %s);\n" % \
(regN, predicated))
elif (regid in {"s", "t", "u", "v"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"d", "e", "x", "y"}):
f.write(" const int %s = insn->regno[%d];\n" % (regN, regno))
f.write(" ctx_log_reg_write(ctx, %s, %s);\n" % \
(regN, predicated))
else:
print("Bad register parse: ", regtype, regid)
elif (regtype == "P"):
if (regid in {"s", "t", "u", "v"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"d", "e", "x"}):
f.write(" const int %s = insn->regno[%d];\n" % (regN, regno))
f.write(" ctx_log_pred_write(ctx, %s);\n" % (regN))
else:
print("Bad register parse: ", regtype, regid)
elif (regtype == "C"):
if (regid == "ss"):
f.write("// const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
(regN, regno))
elif (regid == "dd"):
f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
(regN, regno))
f.write(" ctx_log_reg_write_pair(ctx, %s, %s);\n" % \
(regN, predicated))
elif (regid == "s"):
f.write("// const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
(regN, regno))
elif (regid == "d"):
f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
(regN, regno))
f.write(" ctx_log_reg_write(ctx, %s, %s);\n" % \
(regN, predicated))
else:
print("Bad register parse: ", regtype, regid)
elif (regtype == "M"):
if (regid == "u"):
f.write("// const int %s = insn->regno[%d];\n"% \
(regN, regno))
else:
print("Bad register parse: ", regtype, regid)
elif (regtype == "V"):
newv = "EXT_DFL"
if (hex_common.is_new_result(tag)):
newv = "EXT_NEW"
elif (hex_common.is_tmp_result(tag)):
newv = "EXT_TMP"
if (regid in {"dd", "xx"}):
f.write(" const int %s = insn->regno[%d];\n" %\
(regN, regno))
f.write(" ctx_log_vreg_write_pair(ctx, %s, %s, %s);\n" % \
(regN, newv, predicated))
elif (regid in {"uu", "vv"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"s", "u", "v", "w"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"d", "x", "y"}):
f.write(" const int %s = insn->regno[%d];\n" % \
(regN, regno))
f.write(" ctx_log_vreg_write(ctx, %s, %s, %s);\n" % \
(regN, newv, predicated))
else:
print("Bad register parse: ", regtype, regid)
elif (regtype == "Q"):
if (regid in {"d", "e", "x"}):
f.write(" const int %s = insn->regno[%d];\n" % \
(regN, regno))
f.write(" ctx_log_qreg_write(ctx, %s);\n" % (regN))
elif (regid in {"s", "t", "u", "v"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
else:
print("Bad register parse: ", regtype, regid)
elif (regtype == "G"):
if (regid in {"dd"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"d"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"ss"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"s"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
else:
print("Bad register parse: ", regtype, regid)
elif (regtype == "S"):
if (regid in {"dd"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"d"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"ss"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
elif (regid in {"s"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
else:
print("Bad register parse: ", regtype, regid)
else:
print("Bad register parse: ", regtype, regid)
def analyze_opn_new(f, tag, regtype, regid, regno):
regN = "%s%sN" % (regtype, regid)
if (regtype == "N"):
if (regid in {"s", "t"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
else:
print("Bad register parse: ", regtype, regid)
elif (regtype == "P"):
if (regid in {"t", "u", "v"}):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
else:
print("Bad register parse: ", regtype, regid)
elif (regtype == "O"):
if (regid == "s"):
f.write("// const int %s = insn->regno[%d];\n" % \
(regN, regno))
else:
print("Bad register parse: ", regtype, regid)
else:
print("Bad register parse: ", regtype, regid)
def analyze_opn(f, tag, regtype, regid, toss, numregs, i):
if (hex_common.is_pair(regid)):
analyze_opn_old(f, tag, regtype, regid, i)
elif (hex_common.is_single(regid)):
if hex_common.is_old_val(regtype, regid, tag):
analyze_opn_old(f,tag, regtype, regid, i)
elif hex_common.is_new_val(regtype, regid, tag):
analyze_opn_new(f, tag, regtype, regid, i)
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
else:
print("Bad register parse: ", regtype, regid, toss, numregs)
##
## Generate the code to analyze the instruction
## For A2_add: Rd32=add(Rs32,Rt32), { RdV=RsV+RtV;}
## We produce:
## static void analyze_A2_add(DisasContext *ctx)
## {
## Insn *insn G_GNUC_UNUSED = ctx->insn;
## const int RdN = insn->regno[0];
## ctx_log_reg_write(ctx, RdN, false);
## // const int RsN = insn->regno[1];
## // const int RtN = insn->regno[2];
## }
##
def gen_analyze_func(f, tag, regs, imms):
f.write("static void analyze_%s(DisasContext *ctx)\n" %tag)
f.write('{\n')
f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
i=0
## Analyze all the registers
for regtype, regid, toss, numregs in regs:
analyze_opn(f, tag, regtype, regid, toss, numregs, i)
i += 1
has_generated_helper = (not hex_common.skip_qemu_helper(tag) and
not hex_common.is_idef_parser_enabled(tag))
if (has_generated_helper and
'A_SCALAR_LOAD' in hex_common.attribdict[tag]):
f.write(" ctx->need_pkt_has_store_s1 = true;\n")
f.write("}\n\n")
def main():
hex_common.read_semantics_file(sys.argv[1])
hex_common.read_attribs_file(sys.argv[2])
hex_common.read_overrides_file(sys.argv[3])
hex_common.read_overrides_file(sys.argv[4])
## Whether or not idef-parser is enabled is
## determined by the number of arguments to
## this script:
##
## 5 args. -> not enabled,
## 6 args. -> idef-parser enabled.
##
## The 6:th arg. then holds a list of the successfully
## parsed instructions.
is_idef_parser_enabled = len(sys.argv) > 6
if is_idef_parser_enabled:
hex_common.read_idef_parser_enabled_file(sys.argv[5])
hex_common.calculate_attribs()
tagregs = hex_common.get_tagregs()
tagimms = hex_common.get_tagimms()
with open(sys.argv[-1], 'w') as f:
f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
for tag in hex_common.tags:
gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
f.write("#endif /* HEXAGON_TCG_FUNCS_H */\n")
if __name__ == "__main__":
main()

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
## ##
## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. ## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
## ##
## This program is free software; you can redistribute it and/or modify ## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by ## it under the terms of the GNU General Public License as published by
@ -226,6 +226,14 @@ def gen_helper_function(f, tag, tagregs, tagimms):
print("Bad register parse: ",regtype,regid,toss,numregs) print("Bad register parse: ",regtype,regid,toss,numregs)
i += 1 i += 1
## For conditional instructions, we pass in the destination register
if 'A_CONDEXEC' in hex_common.attribdict[tag]:
for regtype, regid, toss, numregs in regs:
if (hex_common.is_writeonly(regid) and
not hex_common.is_hvx_reg(regtype)):
gen_helper_arg_opn(f, regtype, regid, i, tag)
i += 1
## Arguments to the helper function are the source regs and immediates ## Arguments to the helper function are the source regs and immediates
for regtype,regid,toss,numregs in regs: for regtype,regid,toss,numregs in regs:
if (hex_common.is_read(regid)): if (hex_common.is_read(regid)):
@ -262,10 +270,11 @@ def gen_helper_function(f, tag, tagregs, tagimms):
if hex_common.need_ea(tag): gen_decl_ea(f) if hex_common.need_ea(tag): gen_decl_ea(f)
## Declare the return variable ## Declare the return variable
i=0 i=0
for regtype,regid,toss,numregs in regs: if 'A_CONDEXEC' not in hex_common.attribdict[tag]:
if (hex_common.is_writeonly(regid)): for regtype,regid,toss,numregs in regs:
gen_helper_dest_decl_opn(f,regtype,regid,i) if (hex_common.is_writeonly(regid)):
i += 1 gen_helper_dest_decl_opn(f,regtype,regid,i)
i += 1
for regtype,regid,toss,numregs in regs: for regtype,regid,toss,numregs in regs:
if (hex_common.is_read(regid)): if (hex_common.is_read(regid)):

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
## ##
## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. ## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
## ##
## This program is free software; you can redistribute it and/or modify ## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by ## it under the terms of the GNU General Public License as published by
@ -87,6 +87,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
if hex_common.need_slot(tag): def_helper_size += 1 if hex_common.need_slot(tag): def_helper_size += 1
if hex_common.need_PC(tag): def_helper_size += 1 if hex_common.need_PC(tag): def_helper_size += 1
if hex_common.helper_needs_next_PC(tag): def_helper_size += 1 if hex_common.helper_needs_next_PC(tag): def_helper_size += 1
if hex_common.need_condexec_reg(tag, regs): def_helper_size += 1
f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag)) f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag))
## The return type is void ## The return type is void
f.write(', void' ) f.write(', void' )
@ -96,6 +97,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_part1(tag): def_helper_size += 1
if hex_common.need_slot(tag): def_helper_size += 1 if hex_common.need_slot(tag): def_helper_size += 1
if hex_common.need_PC(tag): def_helper_size += 1 if hex_common.need_PC(tag): def_helper_size += 1
if hex_common.need_condexec_reg(tag, regs): def_helper_size += 1
if hex_common.helper_needs_next_PC(tag): def_helper_size += 1 if hex_common.helper_needs_next_PC(tag): def_helper_size += 1
f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag)) f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag))
@ -121,6 +123,14 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i) gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
i += 1 i += 1
## For conditional instructions, we pass in the destination register
if 'A_CONDEXEC' in hex_common.attribdict[tag]:
for regtype, regid, toss, numregs in regs:
if (hex_common.is_writeonly(regid) and
not hex_common.is_hvx_reg(regtype)):
gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
i += 1
## Generate the qemu type for each input operand (regs and immediates) ## Generate the qemu type for each input operand (regs and immediates)
for regtype,regid,toss,numregs in regs: for regtype,regid,toss,numregs in regs:
if (hex_common.is_read(regid)): if (hex_common.is_read(regid)):

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -332,8 +332,6 @@
tcg_gen_movi_tl(EA, 0); \ tcg_gen_movi_tl(EA, 0); \
PRED; \ PRED; \
CHECK_NOSHUF_PRED(GET_EA, SIZE, LSB); \ CHECK_NOSHUF_PRED(GET_EA, SIZE, LSB); \
PRED_LOAD_CANCEL(LSB, EA); \
tcg_gen_movi_tl(RdV, 0); \
tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \
fLOAD(1, SIZE, SIGN, EA, RdV); \ fLOAD(1, SIZE, SIGN, EA, RdV); \
gen_set_label(label); \ gen_set_label(label); \
@ -391,8 +389,6 @@
tcg_gen_movi_tl(EA, 0); \ tcg_gen_movi_tl(EA, 0); \
PRED; \ PRED; \
CHECK_NOSHUF_PRED(GET_EA, 8, LSB); \ CHECK_NOSHUF_PRED(GET_EA, 8, LSB); \
PRED_LOAD_CANCEL(LSB, EA); \
tcg_gen_movi_i64(RddV, 0); \
tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \
fLOAD(1, 8, u, EA, RddV); \ fLOAD(1, 8, u, EA, RddV); \
gen_set_label(label); \ gen_set_label(label); \
@ -419,16 +415,16 @@
#define fGEN_TCG_STORE(SHORTCODE) \ #define fGEN_TCG_STORE(SHORTCODE) \
do { \ do { \
TCGv HALF = tcg_temp_new(); \ TCGv HALF G_GNUC_UNUSED = tcg_temp_new(); \
TCGv BYTE = tcg_temp_new(); \ TCGv BYTE G_GNUC_UNUSED = tcg_temp_new(); \
SHORTCODE; \ SHORTCODE; \
} while (0) } while (0)
#define fGEN_TCG_STORE_pcr(SHIFT, STORE) \ #define fGEN_TCG_STORE_pcr(SHIFT, STORE) \
do { \ do { \
TCGv ireg = tcg_temp_new(); \ TCGv ireg = tcg_temp_new(); \
TCGv HALF = tcg_temp_new(); \ TCGv HALF G_GNUC_UNUSED = tcg_temp_new(); \
TCGv BYTE = tcg_temp_new(); \ TCGv BYTE G_GNUC_UNUSED = tcg_temp_new(); \
tcg_gen_mov_tl(EA, RxV); \ tcg_gen_mov_tl(EA, RxV); \
gen_read_ireg(ireg, MuV, SHIFT); \ gen_read_ireg(ireg, MuV, SHIFT); \
gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \ gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
@ -491,6 +487,59 @@
#define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \ #define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \
fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN)) fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN))
/*
* dealloc_return
* Assembler mapped to
* r31:30 = dealloc_return(r30):raw
*/
#define fGEN_TCG_L4_return(SHORTCODE) \
gen_return(ctx, RddV, RsV)
/*
* sub-instruction version (no RddV, so handle it manually)
*/
#define fGEN_TCG_SL2_return(SHORTCODE) \
do { \
TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); \
gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \
gen_log_reg_write_pair(HEX_REG_FP, RddV); \
} while (0)
/*
* Conditional returns follow this naming convention
* _t predicate true
* _f predicate false
* _tnew_pt predicate.new true predict taken
* _fnew_pt predicate.new false predict taken
* _tnew_pnt predicate.new true predict not taken
* _fnew_pnt predicate.new false predict not taken
* Predictions are not modelled in QEMU
*
* Example:
* if (p1) r31:30 = dealloc_return(r30):raw
*/
#define fGEN_TCG_L4_return_t(SHORTCODE) \
gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_EQ);
#define fGEN_TCG_L4_return_f(SHORTCODE) \
gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_NE)
#define fGEN_TCG_L4_return_tnew_pt(SHORTCODE) \
gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
#define fGEN_TCG_L4_return_fnew_pt(SHORTCODE) \
gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
#define fGEN_TCG_L4_return_tnew_pnt(SHORTCODE) \
gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
#define fGEN_TCG_L4_return_fnew_pnt(SHORTCODE) \
gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
#define fGEN_TCG_SL2_return_t(SHORTCODE) \
gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_pred[0])
#define fGEN_TCG_SL2_return_f(SHORTCODE) \
gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0])
#define fGEN_TCG_SL2_return_tnew(SHORTCODE) \
gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0])
#define fGEN_TCG_SL2_return_fnew(SHORTCODE) \
gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0])
/* /*
* Mathematical operations with more than one definition require * Mathematical operations with more than one definition require
* special handling * special handling
@ -589,14 +638,24 @@
#define fGEN_TCG_J2_call(SHORTCODE) \ #define fGEN_TCG_J2_call(SHORTCODE) \
gen_call(ctx, riV) gen_call(ctx, riV)
#define fGEN_TCG_J2_callr(SHORTCODE) \
gen_callr(ctx, RsV)
#define fGEN_TCG_J2_callt(SHORTCODE) \ #define fGEN_TCG_J2_callt(SHORTCODE) \
gen_cond_call(ctx, PuV, TCG_COND_EQ, riV) gen_cond_call(ctx, PuV, TCG_COND_EQ, riV)
#define fGEN_TCG_J2_callf(SHORTCODE) \ #define fGEN_TCG_J2_callf(SHORTCODE) \
gen_cond_call(ctx, PuV, TCG_COND_NE, riV) gen_cond_call(ctx, PuV, TCG_COND_NE, riV)
#define fGEN_TCG_J2_callrt(SHORTCODE) \
gen_cond_callr(ctx, TCG_COND_EQ, PuV, RsV)
#define fGEN_TCG_J2_callrf(SHORTCODE) \
gen_cond_callr(ctx, TCG_COND_NE, PuV, RsV)
#define fGEN_TCG_J2_endloop0(SHORTCODE) \ #define fGEN_TCG_J2_endloop0(SHORTCODE) \
gen_endloop0(ctx) gen_endloop0(ctx)
#define fGEN_TCG_J2_endloop1(SHORTCODE) \
gen_endloop1(ctx)
#define fGEN_TCG_J2_endloop01(SHORTCODE) \
gen_endloop01(ctx)
/* /*
* Compound compare and jump instructions * Compound compare and jump instructions
@ -986,6 +1045,19 @@
#define fGEN_TCG_S2_asl_r_r_sat(SHORTCODE) \ #define fGEN_TCG_S2_asl_r_r_sat(SHORTCODE) \
gen_asl_r_r_sat(RdV, RsV, RtV) gen_asl_r_r_sat(RdV, RsV, RtV)
#define fGEN_TCG_SL2_jumpr31(SHORTCODE) \
gen_jumpr(ctx, hex_gpr[HEX_REG_LR])
#define fGEN_TCG_SL2_jumpr31_t(SHORTCODE) \
gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_pred[0])
#define fGEN_TCG_SL2_jumpr31_f(SHORTCODE) \
gen_cond_jumpr31(ctx, TCG_COND_NE, hex_pred[0])
#define fGEN_TCG_SL2_jumpr31_tnew(SHORTCODE) \
gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_new_pred_value[0])
#define fGEN_TCG_SL2_jumpr31_fnew(SHORTCODE) \
gen_cond_jumpr31(ctx, TCG_COND_NE, hex_new_pred_value[0])
/* Floating point */ /* Floating point */
#define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \
gen_helper_conv_sf2df(RddV, cpu_env, RsV) gen_helper_conv_sf2df(RddV, cpu_env, RsV)

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
## ##
## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. ## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
## ##
## This program is free software; you can redistribute it and/or modify ## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by ## it under the terms of the GNU General Public License as published by
@ -30,37 +30,33 @@ def gen_decl_ea_tcg(f, tag):
def genptr_decl_pair_writable(f, tag, regtype, regid, regno): def genptr_decl_pair_writable(f, tag, regtype, regid, regno):
regN="%s%sN" % (regtype,regid) regN="%s%sN" % (regtype,regid)
f.write(" TCGv_i64 %s%sV = tcg_temp_new_i64();\n" % \ if (regtype == "R"):
(regtype, regid)) f.write(" const int %s = insn->regno[%d];\n" % (regN, regno))
if (regtype == "C"): elif (regtype == "C"):
f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \ f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
(regN, regno)) (regN, regno))
else: else:
f.write(" const int %s = insn->regno[%d];\n" % (regN, regno)) print("Bad register parse: ", regtype, regid)
if ('A_CONDEXEC' in hex_common.attribdict[tag]): f.write(" TCGv_i64 %s%sV = get_result_gpr_pair(ctx, %s);\n" % \
f.write(" if (!is_preloaded(ctx, %s)) {\n" % regN) (regtype, regid, regN))
f.write(" tcg_gen_mov_tl(hex_new_value[%s], hex_gpr[%s]);\n" % \
(regN, regN))
f.write(" }\n")
f.write(" if (!is_preloaded(ctx, %s + 1)) {\n" % regN)
f.write(" tcg_gen_mov_tl(hex_new_value[%s + 1], hex_gpr[%s + 1]);\n" % \
(regN, regN))
f.write(" }\n")
def genptr_decl_writable(f, tag, regtype, regid, regno): def genptr_decl_writable(f, tag, regtype, regid, regno):
regN="%s%sN" % (regtype,regid) regN="%s%sN" % (regtype,regid)
f.write(" TCGv %s%sV = tcg_temp_new();\n" % \ if (regtype == "R"):
(regtype, regid)) f.write(" const int %s = insn->regno[%d];\n" % (regN, regno))
if (regtype == "C"): f.write(" TCGv %s%sV = get_result_gpr(ctx, %s);\n" % \
(regtype, regid, regN))
elif (regtype == "C"):
f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \ f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
(regN, regno)) (regN, regno))
else: f.write(" TCGv %s%sV = get_result_gpr(ctx, %s);\n" % \
(regtype, regid, regN))
elif (regtype == "P"):
f.write(" const int %s = insn->regno[%d];\n" % (regN, regno)) f.write(" const int %s = insn->regno[%d];\n" % (regN, regno))
if ('A_CONDEXEC' in hex_common.attribdict[tag]): f.write(" TCGv %s%sV = tcg_temp_new();\n" % \
f.write(" if (!is_preloaded(ctx, %s)) {\n" % regN) (regtype, regid))
f.write(" tcg_gen_mov_tl(hex_new_value[%s], hex_gpr[%s]);\n" % \ else:
(regN, regN)) print("Bad register parse: ", regtype, regid)
f.write(" }\n")
def genptr_decl(f, tag, regtype, regid, regno): def genptr_decl(f, tag, regtype, regid, regno):
regN="%s%sN" % (regtype,regid) regN="%s%sN" % (regtype,regid)
@ -166,17 +162,6 @@ def genptr_decl(f, tag, regtype, regid, regno):
f.write(" ctx_future_vreg_off(ctx, %s%sN," % \ f.write(" ctx_future_vreg_off(ctx, %s%sN," % \
(regtype, regid)) (regtype, regid))
f.write(" 1, true);\n"); f.write(" 1, true);\n");
if 'A_CONDEXEC' in hex_common.attribdict[tag]:
f.write(" if (!is_vreg_preloaded(ctx, %s)) {\n" % (regN))
f.write(" intptr_t src_off =")
f.write(" offsetof(CPUHexagonState, VRegs[%s%sN]);\n"% \
(regtype, regid))
f.write(" tcg_gen_gvec_mov(MO_64, %s%sV_off,\n" % \
(regtype, regid))
f.write(" src_off,\n")
f.write(" sizeof(MMVector),\n")
f.write(" sizeof(MMVector));\n")
f.write(" }\n")
if (not hex_common.skip_qemu_helper(tag)): if (not hex_common.skip_qemu_helper(tag)):
f.write(" TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \ f.write(" TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
@ -191,8 +176,7 @@ def genptr_decl(f, tag, regtype, regid, regno):
(regtype, regid, regno)) (regtype, regid, regno))
f.write(" const intptr_t %s%sV_off =\n" % \ f.write(" const intptr_t %s%sV_off =\n" % \
(regtype, regid)) (regtype, regid))
f.write(" offsetof(CPUHexagonState,\n") f.write(" get_result_qreg(ctx, %s%sN);\n" % \
f.write(" future_QRegs[%s%sN]);\n" % \
(regtype, regid)) (regtype, regid))
if (not hex_common.skip_qemu_helper(tag)): if (not hex_common.skip_qemu_helper(tag)):
f.write(" TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \ f.write(" TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
@ -274,8 +258,12 @@ def genptr_src_read(f, tag, regtype, regid):
f.write(" hex_gpr[%s%sN + 1]);\n" % \ f.write(" hex_gpr[%s%sN + 1]);\n" % \
(regtype, regid)) (regtype, regid))
elif (regid in {"x", "y"}): elif (regid in {"x", "y"}):
f.write(" tcg_gen_mov_tl(%s%sV, hex_gpr[%s%sN]);\n" % \ ## For read/write registers, we need to get the original value into
(regtype,regid,regtype,regid)) ## the result TCGv. For conditional instructions, this is done in
## gen_start_packet. For unconditional instructions, we do it here.
if ('A_CONDEXEC' not in hex_common.attribdict[tag]):
f.write(" tcg_gen_mov_tl(%s%sV, hex_gpr[%s%sN]);\n" % \
(regtype, regid, regtype, regid))
elif (regid not in {"s", "t", "u", "v"}): elif (regid not in {"s", "t", "u", "v"}):
print("Bad register parse: ", regtype, regid) print("Bad register parse: ", regtype, regid)
elif (regtype == "P"): elif (regtype == "P"):
@ -385,37 +373,22 @@ def gen_helper_call_imm(f,immlett):
f.write(", tcgv_%s" % hex_common.imm_name(immlett)) f.write(", tcgv_%s" % hex_common.imm_name(immlett))
def genptr_dst_write_pair(f, tag, regtype, regid): def genptr_dst_write_pair(f, tag, regtype, regid):
if ('A_CONDEXEC' in hex_common.attribdict[tag]): f.write(" gen_log_reg_write_pair(%s%sN, %s%sV);\n" % \
f.write(" gen_log_predicated_reg_write_pair(%s%sN, %s%sV, insn->slot);\n" % \ (regtype, regid, regtype, regid))
(regtype, regid, regtype, regid))
else:
f.write(" gen_log_reg_write_pair(%s%sN, %s%sV);\n" % \
(regtype, regid, regtype, regid))
f.write(" ctx_log_reg_write_pair(ctx, %s%sN);\n" % \
(regtype, regid))
def genptr_dst_write(f, tag, regtype, regid): def genptr_dst_write(f, tag, regtype, regid):
if (regtype == "R"): if (regtype == "R"):
if (regid in {"dd", "xx", "yy"}): if (regid in {"dd", "xx", "yy"}):
genptr_dst_write_pair(f, tag, regtype, regid) genptr_dst_write_pair(f, tag, regtype, regid)
elif (regid in {"d", "e", "x", "y"}): elif (regid in {"d", "e", "x", "y"}):
if ('A_CONDEXEC' in hex_common.attribdict[tag]): f.write(" gen_log_reg_write(%s%sN, %s%sV);\n" % \
f.write(" gen_log_predicated_reg_write(%s%sN, %s%sV,\n" % \ (regtype, regid, regtype, regid))
(regtype, regid, regtype, regid))
f.write(" insn->slot);\n")
else:
f.write(" gen_log_reg_write(%s%sN, %s%sV);\n" % \
(regtype, regid, regtype, regid))
f.write(" ctx_log_reg_write(ctx, %s%sN);\n" % \
(regtype, regid))
else: else:
print("Bad register parse: ", regtype, regid) print("Bad register parse: ", regtype, regid)
elif (regtype == "P"): elif (regtype == "P"):
if (regid in {"d", "e", "x"}): if (regid in {"d", "e", "x"}):
f.write(" gen_log_pred_write(ctx, %s%sN, %s%sV);\n" % \ f.write(" gen_log_pred_write(ctx, %s%sN, %s%sV);\n" % \
(regtype, regid, regtype, regid)) (regtype, regid, regtype, regid))
f.write(" ctx_log_pred_write(ctx, %s%sN);\n" % \
(regtype, regid))
else: else:
print("Bad register parse: ", regtype, regid) print("Bad register parse: ", regtype, regid)
elif (regtype == "C"): elif (regtype == "C"):
@ -432,43 +405,18 @@ def genptr_dst_write(f, tag, regtype, regid):
def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"): def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"):
if (regtype == "V"): if (regtype == "V"):
if (regid in {"dd", "xx", "yy"}): if (regid in {"xx"}):
if ('A_CONDEXEC' in hex_common.attribdict[tag]):
is_predicated = "true"
else:
is_predicated = "false"
f.write(" gen_log_vreg_write_pair(ctx, %s%sV_off, %s%sN, " % \ f.write(" gen_log_vreg_write_pair(ctx, %s%sV_off, %s%sN, " % \
(regtype, regid, regtype, regid)) (regtype, regid, regtype, regid))
f.write("%s, insn->slot, %s);\n" % \ f.write("%s);\n" % \
(newv, is_predicated)) (newv))
f.write(" ctx_log_vreg_write_pair(ctx, %s%sN, %s,\n" % \ elif (regid in {"y"}):
(regtype, regid, newv)) f.write(" gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s);\n" % \
f.write(" %s);\n" % (is_predicated))
elif (regid in {"d", "x", "y"}):
if ('A_CONDEXEC' in hex_common.attribdict[tag]):
is_predicated = "true"
else:
is_predicated = "false"
f.write(" gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s, " % \
(regtype, regid, regtype, regid, newv)) (regtype, regid, regtype, regid, newv))
f.write("insn->slot, %s);\n" % \ elif (regid not in {"dd", "d", "x"}):
(is_predicated))
f.write(" ctx_log_vreg_write(ctx, %s%sN, %s, %s);\n" % \
(regtype, regid, newv, is_predicated))
else:
print("Bad register parse: ", regtype, regid) print("Bad register parse: ", regtype, regid)
elif (regtype == "Q"): elif (regtype == "Q"):
if (regid in {"d", "e", "x"}): if (regid not in {"d", "e", "x"}):
if ('A_CONDEXEC' in hex_common.attribdict[tag]):
is_predicated = "true"
else:
is_predicated = "false"
f.write(" gen_log_qreg_write(%s%sV_off, %s%sN, %s, " % \
(regtype, regid, regtype, regid, newv))
f.write("insn->slot, %s);\n" % (is_predicated))
f.write(" ctx_log_qreg_write(ctx, %s%sN, %s);\n" % \
(regtype, regid, is_predicated))
else:
print("Bad register parse: ", regtype, regid) print("Bad register parse: ", regtype, regid)
else: else:
print("Bad register parse: ", regtype, regid) print("Bad register parse: ", regtype, regid)
@ -500,15 +448,15 @@ def genptr_dst_write_opn(f,regtype, regid, tag):
## For A2_add: Rd32=add(Rs32,Rt32), { RdV=RsV+RtV;} ## For A2_add: Rd32=add(Rs32,Rt32), { RdV=RsV+RtV;}
## We produce: ## We produce:
## static void generate_A2_add(DisasContext *ctx) ## static void generate_A2_add(DisasContext *ctx)
## { ## {
## TCGv RdV = tcg_temp_new(); ## Insn *insn __attribute__((unused)) = ctx->insn;
## const int RdN = insn->regno[0]; ## const int RdN = insn->regno[0];
## TCGv RsV = hex_gpr[insn->regno[1]]; ## TCGv RdV = get_result_gpr(ctx, RdN);
## TCGv RtV = hex_gpr[insn->regno[2]]; ## TCGv RsV = hex_gpr[insn->regno[1]];
## <GEN> ## TCGv RtV = hex_gpr[insn->regno[2]];
## gen_log_reg_write(RdN, RdV); ## <GEN>
## ctx_log_reg_write(ctx, RdN); ## gen_log_reg_write(RdN, RdV);
## } ## }
## ##
## where <GEN> depends on hex_common.skip_qemu_helper(tag) ## where <GEN> depends on hex_common.skip_qemu_helper(tag)
## if hex_common.skip_qemu_helper(tag) is True ## if hex_common.skip_qemu_helper(tag) is True
@ -592,6 +540,14 @@ def gen_tcg_func(f, tag, regs, imms):
if (i > 0): f.write(", ") if (i > 0): f.write(", ")
f.write("cpu_env") f.write("cpu_env")
i=1 i=1
## For conditional instructions, we pass in the destination register
if 'A_CONDEXEC' in hex_common.attribdict[tag]:
for regtype, regid, toss, numregs in regs:
if (hex_common.is_writeonly(regid) and
not hex_common.is_hvx_reg(regtype)):
gen_helper_call_opn(f, tag, regtype, regid, toss, \
numregs, i)
i += 1
for regtype,regid,toss,numregs in regs: for regtype,regid,toss,numregs in regs:
if (hex_common.is_written(regid)): if (hex_common.is_written(regid)):
if (not hex_common.is_hvx_reg(regtype)): if (not hex_common.is_hvx_reg(regtype)):

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -133,16 +133,11 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
do { \ do { \
TCGv lsb = tcg_temp_new(); \ TCGv lsb = tcg_temp_new(); \
TCGLabel *false_label = gen_new_label(); \ TCGLabel *false_label = gen_new_label(); \
TCGLabel *end_label = gen_new_label(); \
tcg_gen_andi_tl(lsb, PsV, 1); \ tcg_gen_andi_tl(lsb, PsV, 1); \
tcg_gen_brcondi_tl(TCG_COND_NE, lsb, PRED, false_label); \ tcg_gen_brcondi_tl(TCG_COND_NE, lsb, PRED, false_label); \
tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \ tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \
sizeof(MMVector), sizeof(MMVector)); \ sizeof(MMVector), sizeof(MMVector)); \
tcg_gen_br(end_label); \
gen_set_label(false_label); \ gen_set_label(false_label); \
tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
1 << insn->slot); \
gen_set_label(end_label); \
} while (0) } while (0)
@ -547,17 +542,12 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
do { \ do { \
TCGv LSB = tcg_temp_new(); \ TCGv LSB = tcg_temp_new(); \
TCGLabel *false_label = gen_new_label(); \ TCGLabel *false_label = gen_new_label(); \
TCGLabel *end_label = gen_new_label(); \
GET_EA; \ GET_EA; \
PRED; \ PRED; \
tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \
gen_vreg_load(ctx, DSTOFF, EA, true); \ gen_vreg_load(ctx, DSTOFF, EA, true); \
INC; \ INC; \
tcg_gen_br(end_label); \
gen_set_label(false_label); \ gen_set_label(false_label); \
tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
1 << insn->slot); \
gen_set_label(end_label); \
} while (0) } while (0)
#define fGEN_TCG_PRED_VEC_LOAD_pred_pi \ #define fGEN_TCG_PRED_VEC_LOAD_pred_pi \
@ -717,17 +707,12 @@ static inline void assert_vhist_tmp(DisasContext *ctx)
do { \ do { \
TCGv LSB = tcg_temp_new(); \ TCGv LSB = tcg_temp_new(); \
TCGLabel *false_label = gen_new_label(); \ TCGLabel *false_label = gen_new_label(); \
TCGLabel *end_label = gen_new_label(); \
GET_EA; \ GET_EA; \
PRED; \ PRED; \
tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \
gen_vreg_store(ctx, EA, SRCOFF, insn->slot, ALIGN); \ gen_vreg_store(ctx, EA, SRCOFF, insn->slot, ALIGN); \
INC; \ INC; \
tcg_gen_br(end_label); \
gen_set_label(false_label); \ gen_set_label(false_label); \
tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
1 << insn->slot); \
gen_set_label(end_label); \
} while (0) } while (0)
#define fGEN_TCG_PRED_VEC_STORE_pred_pi(ALIGN) \ #define fGEN_TCG_PRED_VEC_STORE_pred_pi(ALIGN) \

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -68,26 +68,17 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val,
} }
} }
static inline void gen_log_predicated_reg_write(int rnum, TCGv val, static TCGv get_result_gpr(DisasContext *ctx, int rnum)
uint32_t slot)
{ {
TCGv zero = tcg_constant_tl(0); return hex_new_value[rnum];
TCGv slot_mask = tcg_temp_new(); }
tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot); static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum)
tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero, {
val, hex_new_value[rnum]); TCGv_i64 result = tcg_temp_new_i64();
if (HEX_DEBUG) { tcg_gen_concat_i32_i64(result, hex_new_value[rnum],
/* hex_new_value[rnum + 1]);
* Do this so HELPER(debug_commit_end) will know return result;
*
* Note that slot_mask indicates the value is not written
* (i.e., slot was cancelled), so we create a true/false value before
* or'ing with hex_reg_written[rnum].
*/
tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
}
} }
void gen_log_reg_write(int rnum, TCGv val) void gen_log_reg_write(int rnum, TCGv val)
@ -102,39 +93,6 @@ void gen_log_reg_write(int rnum, TCGv val)
} }
} }
static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val,
uint32_t slot)
{
TCGv val32 = tcg_temp_new();
TCGv zero = tcg_constant_tl(0);
TCGv slot_mask = tcg_temp_new();
tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot);
/* Low word */
tcg_gen_extrl_i64_i32(val32, val);
tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum],
slot_mask, zero,
val32, hex_new_value[rnum]);
/* High word */
tcg_gen_extrh_i64_i32(val32, val);
tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum + 1],
slot_mask, zero,
val32, hex_new_value[rnum + 1]);
if (HEX_DEBUG) {
/*
* Do this so HELPER(debug_commit_end) will know
*
* Note that slot_mask indicates the value is not written
* (i.e., slot was cancelled), so we create a true/false value before
* or'ing with hex_reg_written[rnum].
*/
tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1],
slot_mask);
}
}
static void gen_log_reg_write_pair(int rnum, TCGv_i64 val) static void gen_log_reg_write_pair(int rnum, TCGv_i64 val)
{ {
const target_ulong reg_mask_low = reg_immut_masks[rnum]; const target_ulong reg_mask_low = reg_immut_masks[rnum];
@ -180,6 +138,7 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
hex_new_pred_value[pnum], base_val); hex_new_pred_value[pnum], base_val);
} }
tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum);
set_bit(pnum, ctx->pregs_written);
} }
static inline void gen_read_p3_0(TCGv control_reg) static inline void gen_read_p3_0(TCGv control_reg)
@ -256,7 +215,6 @@ static void gen_write_p3_0(DisasContext *ctx, TCGv control_reg)
for (int i = 0; i < NUM_PREGS; i++) { for (int i = 0; i < NUM_PREGS; i++) {
tcg_gen_extract_tl(hex_p8, control_reg, i * 8, 8); tcg_gen_extract_tl(hex_p8, control_reg, i * 8, 8);
gen_log_pred_write(ctx, i, hex_p8); gen_log_pred_write(ctx, i, hex_p8);
ctx_log_pred_write(ctx, i);
} }
} }
@ -274,7 +232,6 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num,
gen_write_p3_0(ctx, val); gen_write_p3_0(ctx, val);
} else { } else {
gen_log_reg_write(reg_num, val); gen_log_reg_write(reg_num, val);
ctx_log_reg_write(ctx, reg_num);
if (reg_num == HEX_REG_QEMU_PKT_CNT) { if (reg_num == HEX_REG_QEMU_PKT_CNT) {
ctx->num_packets = 0; ctx->num_packets = 0;
} }
@ -291,15 +248,14 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num,
TCGv_i64 val) TCGv_i64 val)
{ {
if (reg_num == HEX_REG_P3_0_ALIASED) { if (reg_num == HEX_REG_P3_0_ALIASED) {
TCGv result = get_result_gpr(ctx, reg_num + 1);
TCGv val32 = tcg_temp_new(); TCGv val32 = tcg_temp_new();
tcg_gen_extrl_i64_i32(val32, val); tcg_gen_extrl_i64_i32(val32, val);
gen_write_p3_0(ctx, val32); gen_write_p3_0(ctx, val32);
tcg_gen_extrh_i64_i32(val32, val); tcg_gen_extrh_i64_i32(val32, val);
gen_log_reg_write(reg_num + 1, val32); tcg_gen_mov_tl(result, val32);
ctx_log_reg_write(ctx, reg_num + 1);
} else { } else {
gen_log_reg_write_pair(reg_num, val); gen_log_reg_write_pair(reg_num, val);
ctx_log_reg_write_pair(ctx, reg_num);
if (reg_num == HEX_REG_QEMU_PKT_CNT) { if (reg_num == HEX_REG_QEMU_PKT_CNT) {
ctx->num_packets = 0; ctx->num_packets = 0;
ctx->num_insns = 0; ctx->num_insns = 0;
@ -571,6 +527,13 @@ static void gen_cond_jumpr(DisasContext *ctx, TCGv dst_pc,
gen_write_new_pc_addr(ctx, dst_pc, cond, pred); gen_write_new_pc_addr(ctx, dst_pc, cond, pred);
} }
static void gen_cond_jumpr31(DisasContext *ctx, TCGCond cond, TCGv pred)
{
TCGv LSB = tcg_temp_new();
tcg_gen_andi_tl(LSB, pred, 1);
gen_cond_jumpr(ctx, hex_gpr[HEX_REG_LR], cond, LSB);
}
static void gen_cond_jump(DisasContext *ctx, TCGCond cond, TCGv pred, static void gen_cond_jump(DisasContext *ctx, TCGCond cond, TCGv pred,
int pc_off) int pc_off)
{ {
@ -669,27 +632,99 @@ static void gen_jumpr(DisasContext *ctx, TCGv new_pc)
static void gen_call(DisasContext *ctx, int pc_off) static void gen_call(DisasContext *ctx, int pc_off)
{ {
TCGv next_PC = TCGv lr = get_result_gpr(ctx, HEX_REG_LR);
tcg_constant_tl(ctx->pkt->pc + ctx->pkt->encod_pkt_size_in_bytes); tcg_gen_movi_tl(lr, ctx->next_PC);
gen_log_reg_write(HEX_REG_LR, next_PC);
gen_write_new_pc_pcrel(ctx, pc_off, TCG_COND_ALWAYS, NULL); gen_write_new_pc_pcrel(ctx, pc_off, TCG_COND_ALWAYS, NULL);
} }
static void gen_callr(DisasContext *ctx, TCGv new_pc)
{
TCGv lr = get_result_gpr(ctx, HEX_REG_LR);
tcg_gen_movi_tl(lr, ctx->next_PC);
gen_write_new_pc_addr(ctx, new_pc, TCG_COND_ALWAYS, NULL);
}
static void gen_cond_call(DisasContext *ctx, TCGv pred, static void gen_cond_call(DisasContext *ctx, TCGv pred,
TCGCond cond, int pc_off) TCGCond cond, int pc_off)
{ {
TCGv next_PC; TCGv lr = get_result_gpr(ctx, HEX_REG_LR);
TCGv lsb = tcg_temp_new(); TCGv lsb = tcg_temp_new();
TCGLabel *skip = gen_new_label(); TCGLabel *skip = gen_new_label();
tcg_gen_andi_tl(lsb, pred, 1); tcg_gen_andi_tl(lsb, pred, 1);
gen_write_new_pc_pcrel(ctx, pc_off, cond, lsb); gen_write_new_pc_pcrel(ctx, pc_off, cond, lsb);
tcg_gen_brcondi_tl(cond, lsb, 0, skip); tcg_gen_brcondi_tl(cond, lsb, 0, skip);
next_PC = tcg_gen_movi_tl(lr, ctx->next_PC);
tcg_constant_tl(ctx->pkt->pc + ctx->pkt->encod_pkt_size_in_bytes);
gen_log_reg_write(HEX_REG_LR, next_PC);
gen_set_label(skip); gen_set_label(skip);
} }
static void gen_cond_callr(DisasContext *ctx,
TCGCond cond, TCGv pred, TCGv new_pc)
{
TCGv lsb = tcg_temp_new();
TCGLabel *skip = gen_new_label();
tcg_gen_andi_tl(lsb, pred, 1);
tcg_gen_brcondi_tl(cond, lsb, 0, skip);
gen_callr(ctx, new_pc);
gen_set_label(skip);
}
/* frame ^= (int64_t)FRAMEKEY << 32 */
static void gen_frame_unscramble(TCGv_i64 frame)
{
TCGv_i64 framekey = tcg_temp_new_i64();
tcg_gen_extu_i32_i64(framekey, hex_gpr[HEX_REG_FRAMEKEY]);
tcg_gen_shli_i64(framekey, framekey, 32);
tcg_gen_xor_i64(frame, frame, framekey);
}
static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA)
{
Insn *insn = ctx->insn; /* Needed for CHECK_NOSHUF */
CHECK_NOSHUF(EA, 8);
tcg_gen_qemu_ld64(frame, EA, ctx->mem_idx);
}
static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src)
{
/*
* frame = *src
* dst = frame_unscramble(frame)
* SP = src + 8
* PC = dst.w[1]
*/
TCGv_i64 frame = tcg_temp_new_i64();
TCGv r31 = tcg_temp_new();
TCGv r29 = get_result_gpr(ctx, HEX_REG_SP);
gen_load_frame(ctx, frame, src);
gen_frame_unscramble(frame);
tcg_gen_mov_i64(dst, frame);
tcg_gen_addi_tl(r29, src, 8);
tcg_gen_extrh_i64_i32(r31, dst);
gen_jumpr(ctx, r31);
}
/* if (pred) dst = dealloc_return(src):raw */
static void gen_cond_return(DisasContext *ctx, TCGv_i64 dst, TCGv src,
TCGv pred, TCGCond cond)
{
TCGv LSB = tcg_temp_new();
TCGLabel *skip = gen_new_label();
tcg_gen_andi_tl(LSB, pred, 1);
tcg_gen_brcondi_tl(cond, LSB, 0, skip);
gen_return(ctx, dst, src);
gen_set_label(skip);
}
/* sub-instruction version (no RddV, so handle it manually) */
static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred)
{
TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP);
gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond);
gen_log_reg_write_pair(HEX_REG_FP, RddV);
}
static void gen_endloop0(DisasContext *ctx) static void gen_endloop0(DisasContext *ctx)
{ {
TCGv lpcfg = tcg_temp_new(); TCGv lpcfg = tcg_temp_new();
@ -737,14 +772,95 @@ static void gen_endloop0(DisasContext *ctx)
TCGLabel *label3 = gen_new_label(); TCGLabel *label3 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3); tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3);
{ {
TCGv lc0 = get_result_gpr(ctx, HEX_REG_LC0);
gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]); gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]);
tcg_gen_subi_tl(hex_new_value[HEX_REG_LC0], tcg_gen_subi_tl(lc0, hex_gpr[HEX_REG_LC0], 1);
hex_gpr[HEX_REG_LC0], 1);
} }
gen_set_label(label3); gen_set_label(label3);
} }
} }
static void gen_endloop1(DisasContext *ctx)
{
/*
* if (hex_gpr[HEX_REG_LC1] > 1) {
* PC = hex_gpr[HEX_REG_SA1];
* hex_new_value[HEX_REG_LC1] = hex_gpr[HEX_REG_LC1] - 1;
* }
*/
TCGLabel *label = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC1], 1, label);
{
TCGv lc1 = get_result_gpr(ctx, HEX_REG_LC1);
gen_jumpr(ctx, hex_gpr[HEX_REG_SA1]);
tcg_gen_subi_tl(lc1, hex_gpr[HEX_REG_LC1], 1);
}
gen_set_label(label);
}
static void gen_endloop01(DisasContext *ctx)
{
TCGv lpcfg = tcg_temp_new();
TCGLabel *label1 = gen_new_label();
TCGLabel *label2 = gen_new_label();
TCGLabel *label3 = gen_new_label();
TCGLabel *done = gen_new_label();
GET_USR_FIELD(USR_LPCFG, lpcfg);
/*
* if (lpcfg == 1) {
* hex_new_pred_value[3] = 0xff;
* hex_pred_written |= 1 << 3;
* }
*/
tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1);
{
tcg_gen_movi_tl(hex_new_pred_value[3], 0xff);
tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3);
}
gen_set_label(label1);
/*
* if (lpcfg) {
* SET_USR_FIELD(USR_LPCFG, lpcfg - 1);
* }
*/
tcg_gen_brcondi_tl(TCG_COND_EQ, lpcfg, 0, label2);
{
tcg_gen_subi_tl(lpcfg, lpcfg, 1);
SET_USR_FIELD(USR_LPCFG, lpcfg);
}
gen_set_label(label2);
/*
* if (hex_gpr[HEX_REG_LC0] > 1) {
* PC = hex_gpr[HEX_REG_SA0];
* hex_new_value[HEX_REG_LC0] = hex_gpr[HEX_REG_LC0] - 1;
* } else {
* if (hex_gpr[HEX_REG_LC1] > 1) {
* hex_next_pc = hex_gpr[HEX_REG_SA1];
* hex_new_value[HEX_REG_LC1] = hex_gpr[HEX_REG_LC1] - 1;
* }
* }
*/
tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3);
{
TCGv lc0 = get_result_gpr(ctx, HEX_REG_LC0);
gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]);
tcg_gen_subi_tl(lc0, hex_gpr[HEX_REG_LC0], 1);
tcg_gen_br(done);
}
gen_set_label(label3);
tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC1], 1, done);
{
TCGv lc1 = get_result_gpr(ctx, HEX_REG_LC1);
gen_jumpr(ctx, hex_gpr[HEX_REG_SA1]);
tcg_gen_subi_tl(lc1, hex_gpr[HEX_REG_LC1], 1);
}
gen_set_label(done);
}
static void gen_cmp_jumpnv(DisasContext *ctx, static void gen_cmp_jumpnv(DisasContext *ctx,
TCGCond cond, TCGv val, TCGv src, int pc_off) TCGCond cond, TCGv val, TCGv src, int pc_off)
{ {
@ -869,68 +985,32 @@ static intptr_t vreg_src_off(DisasContext *ctx, int num)
} }
static void gen_log_vreg_write(DisasContext *ctx, intptr_t srcoff, int num, static void gen_log_vreg_write(DisasContext *ctx, intptr_t srcoff, int num,
VRegWriteType type, int slot_num, VRegWriteType type)
bool is_predicated)
{ {
TCGLabel *label_end = NULL;
intptr_t dstoff; intptr_t dstoff;
if (is_predicated) {
TCGv cancelled = tcg_temp_new();
label_end = gen_new_label();
/* Don't do anything if the slot was cancelled */
tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
}
if (type != EXT_TMP) { if (type != EXT_TMP) {
dstoff = ctx_future_vreg_off(ctx, num, 1, true); dstoff = ctx_future_vreg_off(ctx, num, 1, true);
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, tcg_gen_gvec_mov(MO_64, dstoff, srcoff,
sizeof(MMVector), sizeof(MMVector)); sizeof(MMVector), sizeof(MMVector));
tcg_gen_ori_tl(hex_VRegs_updated, hex_VRegs_updated, 1 << num);
} else { } else {
dstoff = ctx_tmp_vreg_off(ctx, num, 1, false); dstoff = ctx_tmp_vreg_off(ctx, num, 1, false);
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, tcg_gen_gvec_mov(MO_64, dstoff, srcoff,
sizeof(MMVector), sizeof(MMVector)); sizeof(MMVector), sizeof(MMVector));
} }
if (is_predicated) {
gen_set_label(label_end);
}
} }
static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num, static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num,
VRegWriteType type, int slot_num, VRegWriteType type)
bool is_predicated)
{ {
gen_log_vreg_write(ctx, srcoff, num ^ 0, type, slot_num, is_predicated); gen_log_vreg_write(ctx, srcoff, num ^ 0, type);
srcoff += sizeof(MMVector); srcoff += sizeof(MMVector);
gen_log_vreg_write(ctx, srcoff, num ^ 1, type, slot_num, is_predicated); gen_log_vreg_write(ctx, srcoff, num ^ 1, type);
} }
static void gen_log_qreg_write(intptr_t srcoff, int num, int vnew, static intptr_t get_result_qreg(DisasContext *ctx, int qnum)
int slot_num, bool is_predicated)
{ {
TCGLabel *label_end = NULL; return offsetof(CPUHexagonState, future_QRegs[qnum]);
intptr_t dstoff;
if (is_predicated) {
TCGv cancelled = tcg_temp_new();
label_end = gen_new_label();
/* Don't do anything if the slot was cancelled */
tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
}
dstoff = offsetof(CPUHexagonState, future_QRegs[num]);
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMQReg), sizeof(MMQReg));
if (is_predicated) {
tcg_gen_ori_tl(hex_QRegs_updated, hex_QRegs_updated, 1 << num);
gen_set_label(label_end);
}
} }
static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src, static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src,

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
## ##
## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. ## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
## ##
## This program is free software; you can redistribute it and/or modify ## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by ## it under the terms of the GNU General Public License as published by
@ -89,6 +89,7 @@ def calculate_attribs():
add_qemu_macro_attrib('fWRITE_P3', 'A_WRITES_PRED_REG') add_qemu_macro_attrib('fWRITE_P3', 'A_WRITES_PRED_REG')
add_qemu_macro_attrib('fSET_OVERFLOW', 'A_IMPLICIT_WRITES_USR') add_qemu_macro_attrib('fSET_OVERFLOW', 'A_IMPLICIT_WRITES_USR')
add_qemu_macro_attrib('fSET_LPCFG', 'A_IMPLICIT_WRITES_USR') add_qemu_macro_attrib('fSET_LPCFG', 'A_IMPLICIT_WRITES_USR')
add_qemu_macro_attrib('fLOAD', 'A_SCALAR_LOAD')
add_qemu_macro_attrib('fSTORE', 'A_SCALAR_STORE') add_qemu_macro_attrib('fSTORE', 'A_SCALAR_STORE')
# Recurse down macros, find attributes from sub-macros # Recurse down macros, find attributes from sub-macros
@ -236,6 +237,13 @@ def helper_needs_next_PC(tag):
def need_pkt_has_multi_cof(tag): def need_pkt_has_multi_cof(tag):
return 'A_COF' in attribdict[tag] return 'A_COF' in attribdict[tag]
def need_condexec_reg(tag, regs):
if 'A_CONDEXEC' in attribdict[tag]:
for regtype, regid, toss, numregs in regs:
if is_writeonly(regid) and not is_hvx_reg(regtype):
return True
return False
def skip_qemu_helper(tag): def skip_qemu_helper(tag):
return tag in overrides.keys() return tag in overrides.keys()

View File

@ -82,7 +82,6 @@ enum ImmUnionTag {
VALUE, VALUE,
QEMU_TMP, QEMU_TMP,
IMM_PC, IMM_PC,
IMM_NPC,
IMM_CONSTEXT, IMM_CONSTEXT,
}; };

View File

@ -5,7 +5,7 @@
%{ %{
/* /*
* Copyright(c) 2019-2022 rev.ng Labs Srl. All Rights Reserved. * Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -140,8 +140,6 @@ STRING_LIT \"(\\.|[^"\\])*\"
yylval->rvalue.is_dotnew = true; yylval->rvalue.is_dotnew = true;
yylval->rvalue.signedness = SIGNED; yylval->rvalue.signedness = SIGNED;
return PRED; } return PRED; }
"IV1DEAD()" |
"fPAUSE(uiV);" { return ';'; }
"+=" { return INC; } "+=" { return INC; }
"-=" { return DEC; } "-=" { return DEC; }
"++" { return PLUSPLUS; } "++" { return PLUSPLUS; }
@ -159,9 +157,8 @@ STRING_LIT \"(\\.|[^"\\])*\"
"else" { return ELSE; } "else" { return ELSE; }
"for" { return FOR; } "for" { return FOR; }
"fREAD_IREG" { return ICIRC; } "fREAD_IREG" { return ICIRC; }
"fPART1" { return PART1; }
"if" { return IF; } "if" { return IF; }
"fFRAME_SCRAMBLE" { return FSCR; } "fFRAME_SCRAMBLE" |
"fFRAME_UNSCRAMBLE" { return FSCR; } "fFRAME_UNSCRAMBLE" { return FSCR; }
"fFRAMECHECK" { return FCHK; } "fFRAMECHECK" { return FCHK; }
"Constant_extended" { return CONSTEXT; } "Constant_extended" { return CONSTEXT; }
@ -312,14 +309,10 @@ STRING_LIT \"(\\.|[^"\\])*\"
"(unsigned int)" { yylval->cast.bit_width = 32; "(unsigned int)" { yylval->cast.bit_width = 32;
yylval->cast.signedness = UNSIGNED; yylval->cast.signedness = UNSIGNED;
return CAST; } return CAST; }
"fREAD_PC()" | "fREAD_PC()" { return PC; }
"PC" { return PC; }
"fREAD_NPC()" |
"NPC" { return NPC; }
"fGET_LPCFG" |
"USR.LPCFG" { return LPCFG; } "USR.LPCFG" { return LPCFG; }
"LOAD_CANCEL(EA)" { return LOAD_CANCEL; } "LOAD_CANCEL(EA)" { return LOAD_CANCEL; }
"STORE_CANCEL(EA)" | "STORE_CANCEL(EA)" { return STORE_CANCEL; }
"CANCEL" { return CANCEL; } "CANCEL" { return CANCEL; }
"N"{LOWER_ID}"N" { yylval->rvalue.type = REGISTER_ARG; "N"{LOWER_ID}"N" { yylval->rvalue.type = REGISTER_ARG;
yylval->rvalue.reg.type = DOTNEW; yylval->rvalue.reg.type = DOTNEW;
@ -360,14 +353,6 @@ STRING_LIT \"(\\.|[^"\\])*\"
yylval->rvalue.bit_width = 32; yylval->rvalue.bit_width = 32;
yylval->rvalue.signedness = UNSIGNED; yylval->rvalue.signedness = UNSIGNED;
return REG; } return REG; }
"fREAD_LC"[01] { yylval->rvalue.type = REGISTER;
yylval->rvalue.reg.type = CONTROL;
yylval->rvalue.reg.id = HEX_REG_LC0
+ (yytext[8] - '0') * 2;
yylval->rvalue.reg.bit_width = 32;
yylval->rvalue.bit_width = 32;
yylval->rvalue.signedness = UNSIGNED;
return REG; }
"LC"[01] { yylval->rvalue.type = REGISTER; "LC"[01] { yylval->rvalue.type = REGISTER;
yylval->rvalue.reg.type = CONTROL; yylval->rvalue.reg.type = CONTROL;
yylval->rvalue.reg.id = HEX_REG_LC0 yylval->rvalue.reg.id = HEX_REG_LC0
@ -376,14 +361,6 @@ STRING_LIT \"(\\.|[^"\\])*\"
yylval->rvalue.bit_width = 32; yylval->rvalue.bit_width = 32;
yylval->rvalue.signedness = UNSIGNED; yylval->rvalue.signedness = UNSIGNED;
return REG; } return REG; }
"fREAD_SA"[01] { yylval->rvalue.type = REGISTER;
yylval->rvalue.reg.type = CONTROL;
yylval->rvalue.reg.id = HEX_REG_SA0
+ (yytext[8] - '0') * 2;
yylval->rvalue.reg.bit_width = 32;
yylval->rvalue.bit_width = 32;
yylval->rvalue.signedness = UNSIGNED;
return REG; }
"SA"[01] { yylval->rvalue.type = REGISTER; "SA"[01] { yylval->rvalue.type = REGISTER;
yylval->rvalue.reg.type = CONTROL; yylval->rvalue.reg.type = CONTROL;
yylval->rvalue.reg.id = HEX_REG_SA0 yylval->rvalue.reg.id = HEX_REG_SA0

View File

@ -1,6 +1,6 @@
%{ %{
/* /*
* Copyright(c) 2019-2022 rev.ng Labs Srl. All Rights Reserved. * Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved.
* *
* This program is distributed in the hope that it will be useful, * This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
@ -52,8 +52,8 @@
%token IN INAME VAR %token IN INAME VAR
%token ABS CROUND ROUND CIRCADD COUNTONES INC DEC ANDA ORA XORA PLUSPLUS ASL %token ABS CROUND ROUND CIRCADD COUNTONES INC DEC ANDA ORA XORA PLUSPLUS ASL
%token ASR LSR EQ NEQ LTE GTE MIN MAX ANDL FOR ICIRC IF MUN FSCR FCHK SXT %token ASR LSR EQ NEQ LTE GTE MIN MAX ANDL FOR ICIRC IF MUN FSCR FCHK SXT
%token ZXT CONSTEXT LOCNT BREV SIGN LOAD STORE PC NPC LPCFG %token ZXT CONSTEXT LOCNT BREV SIGN LOAD STORE PC LPCFG
%token LOAD_CANCEL CANCEL IDENTITY PART1 ROTL INSBITS SETBITS EXTRANGE %token LOAD_CANCEL STORE_CANCEL CANCEL IDENTITY ROTL INSBITS SETBITS EXTRANGE
%token CAST4_8U FAIL CARRY_FROM_ADD ADDSAT64 LSBNEW %token CAST4_8U FAIL CARRY_FROM_ADD ADDSAT64 LSBNEW
%token TYPE_SIZE_T TYPE_INT TYPE_SIGNED TYPE_UNSIGNED TYPE_LONG %token TYPE_SIZE_T TYPE_INT TYPE_SIGNED TYPE_UNSIGNED TYPE_LONG
@ -336,15 +336,6 @@ assign_statement : lvalue '=' rvalue
OUT(c, &@1, &$1, " = ", &$3, ";\n"); OUT(c, &@1, &$1, " = ", &$3, ";\n");
$$ = $1; $$ = $1;
} }
| PC '=' rvalue
{
@1.last_column = @3.last_column;
yyassert(c, &@1, !is_inside_ternary(c),
"Assignment side-effect not modeled!");
$3 = gen_rvalue_truncate(c, &@1, &$3);
$3 = rvalue_materialize(c, &@1, &$3);
OUT(c, &@1, "gen_write_new_pc(", &$3, ");\n");
}
| LOAD '(' IMM ',' IMM ',' SIGN ',' var ',' lvalue ')' | LOAD '(' IMM ',' IMM ',' SIGN ',' var ',' lvalue ')'
{ {
@1.last_column = @12.last_column; @1.last_column = @12.last_column;
@ -412,7 +403,6 @@ control_statement : frame_check
| cancel_statement | cancel_statement
| if_statement | if_statement
| for_statement | for_statement
| fpart1_statement
; ;
frame_check : FCHK '(' rvalue ',' rvalue ')' ';' frame_check : FCHK '(' rvalue ',' rvalue ')' ';'
@ -422,10 +412,11 @@ cancel_statement : LOAD_CANCEL
{ {
gen_load_cancel(c, &@1); gen_load_cancel(c, &@1);
} }
| CANCEL | STORE_CANCEL
{ {
gen_cancel(c, &@1); gen_cancel(c, &@1);
} }
| CANCEL
; ;
if_statement : if_stmt if_statement : if_stmt
@ -462,17 +453,6 @@ for_statement : FOR '(' IMM '=' IMM ';' IMM '<' IMM ';' IMM PLUSPLUS ')'
} }
; ;
fpart1_statement : PART1
{
OUT(c, &@1, "if (insn->part1) {\n");
}
'(' statements ')'
{
@1.last_column = @3.last_column;
OUT(c, &@1, "return; }\n");
}
;
if_stmt : IF '(' rvalue ')' if_stmt : IF '(' rvalue ')'
{ {
@1.last_column = @3.last_column; @1.last_column = @3.last_column;
@ -512,20 +492,6 @@ rvalue : FAIL
rvalue.signedness = UNSIGNED; rvalue.signedness = UNSIGNED;
$$ = rvalue; $$ = rvalue;
} }
| NPC
{
/*
* NPC is only read from CALLs, so we can hardcode it
* at translation time
*/
HexValue rvalue;
memset(&rvalue, 0, sizeof(HexValue));
rvalue.type = IMMEDIATE;
rvalue.imm.type = IMM_NPC;
rvalue.bit_width = 32;
rvalue.signedness = UNSIGNED;
$$ = rvalue;
}
| CONSTEXT | CONSTEXT
{ {
HexValue rvalue; HexValue rvalue;
@ -781,11 +747,6 @@ rvalue : FAIL
/* Ones count */ /* Ones count */
$$ = gen_ctpop_op(c, &@1, &$3); $$ = gen_ctpop_op(c, &@1, &$3);
} }
| LPCFG
{
$$ = gen_tmp(c, &@1, 32, UNSIGNED);
OUT(c, &@1, "GET_USR_FIELD(USR_LPCFG, ", &$$, ");\n");
}
| EXTRACT '(' rvalue ',' rvalue ')' | EXTRACT '(' rvalue ',' rvalue ')'
{ {
@1.last_column = @6.last_column; @1.last_column = @6.last_column;

View File

@ -97,16 +97,8 @@
#define fWRITE_LR(A) (LR = A) #define fWRITE_LR(A) (LR = A)
#define fWRITE_FP(A) (FP = A) #define fWRITE_FP(A) (FP = A)
#define fWRITE_SP(A) (SP = A) #define fWRITE_SP(A) (SP = A)
/*
* Note: There is a rule in the parser that matches `PC = ...` and emits
* a call to `gen_write_new_pc`. We need to call `gen_write_new_pc` to
* get the correct semantics when there are multiple stores in a packet.
*/
#define fBRANCH(LOC, TYPE) (PC = LOC)
#define fJUMPR(REGNO, TARGET, TYPE) (PC = TARGET)
#define fWRITE_LOOP_REGS0(START, COUNT) SA0 = START; (LC0 = COUNT) #define fWRITE_LOOP_REGS0(START, COUNT) SA0 = START; (LC0 = COUNT)
#define fWRITE_LOOP_REGS1(START, COUNT) SA1 = START; (LC1 = COUNT) #define fWRITE_LOOP_REGS1(START, COUNT) SA1 = START; (LC1 = COUNT)
#define fWRITE_LC0(VAL) (LC0 = VAL)
#define fWRITE_LC1(VAL) (LC1 = VAL) #define fWRITE_LC1(VAL) (LC1 = VAL)
#define fSET_LPCFG(VAL) (USR.LPCFG = VAL) #define fSET_LPCFG(VAL) (USR.LPCFG = VAL)
#define fWRITE_P0(VAL) P0 = VAL; #define fWRITE_P0(VAL) P0 = VAL;
@ -121,7 +113,6 @@
#define fEA_GPI(IMM) (EA = fREAD_GP() + IMM) #define fEA_GPI(IMM) (EA = fREAD_GP() + IMM)
#define fPM_I(REG, IMM) (REG = REG + IMM) #define fPM_I(REG, IMM) (REG = REG + IMM)
#define fPM_M(REG, MVAL) (REG = REG + MVAL) #define fPM_M(REG, MVAL) (REG = REG + MVAL)
#define fWRITE_NPC(VAL) (PC = VAL)
/* Unary operators */ /* Unary operators */
#define fROUND(A) (A + 0x8000) #define fROUND(A) (A + 0x8000)

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 rev.ng Labs Srl. All Rights Reserved. * Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -185,9 +185,6 @@ void imm_print(Context *c, YYLTYPE *locp, HexImm *imm)
case IMM_PC: case IMM_PC:
EMIT(c, "ctx->base.pc_next"); EMIT(c, "ctx->base.pc_next");
break; break;
case IMM_NPC:
EMIT(c, "ctx->npc");
break;
case IMM_CONSTEXT: case IMM_CONSTEXT:
EMIT(c, "insn->extension_valid"); EMIT(c, "insn->extension_valid");
break; break;
@ -1323,10 +1320,6 @@ void gen_write_reg(Context *c, YYLTYPE *locp, HexValue *reg, HexValue *value)
locp, locp,
"gen_log_reg_write(", &reg->reg.id, ", ", "gen_log_reg_write(", &reg->reg.id, ", ",
&value_m, ");\n"); &value_m, ");\n");
OUT(c,
locp,
"ctx_log_reg_write(ctx, ", &reg->reg.id,
");\n");
} }
void gen_assign(Context *c, void gen_assign(Context *c,
@ -1675,9 +1668,7 @@ void gen_inst_init_args(Context *c, YYLTYPE *locp)
for (unsigned i = 0; i < c->inst.init_list->len; i++) { for (unsigned i = 0; i < c->inst.init_list->len; i++) {
HexValue *val = &g_array_index(c->inst.init_list, HexValue, i); HexValue *val = &g_array_index(c->inst.init_list, HexValue, i);
if (val->type == REGISTER_ARG) { if (val->type == REGISTER_ARG) {
char reg_id[5]; /* Nothing to do here */
reg_compose(c, locp, &val->reg, reg_id);
EMIT_HEAD(c, "tcg_gen_movi_i%u(%s, 0);\n", val->bit_width, reg_id);
} else if (val->type == PREDICATE) { } else if (val->type == PREDICATE) {
char suffix = val->is_dotnew ? 'N' : 'V'; char suffix = val->is_dotnew ? 'N' : 'V';
EMIT_HEAD(c, "tcg_gen_movi_i%u(P%c%c, 0);\n", val->bit_width, EMIT_HEAD(c, "tcg_gen_movi_i%u(P%c%c, 0);\n", val->bit_width,
@ -1722,13 +1713,10 @@ void gen_pred_assign(Context *c, YYLTYPE *locp, HexValue *left_pred,
*left_pred = gen_tmp(c, locp, 32, UNSIGNED); *left_pred = gen_tmp(c, locp, 32, UNSIGNED);
} }
/* Extract first 8 bits, and store new predicate value */ /* Extract first 8 bits, and store new predicate value */
OUT(c, locp, "tcg_gen_mov_i32(", left_pred, ", ", &r, ");\n"); OUT(c, locp, "tcg_gen_andi_i32(", left_pred, ", ", &r, ", 0xff);\n");
OUT(c, locp, "tcg_gen_andi_i32(", left_pred, ", ", left_pred,
", 0xff);\n");
if (is_direct) { if (is_direct) {
OUT(c, locp, "gen_log_pred_write(ctx, ", pred_id, ", ", left_pred, OUT(c, locp, "gen_log_pred_write(ctx, ", pred_id, ", ", left_pred,
");\n"); ");\n");
OUT(c, locp, "ctx_log_pred_write(ctx, ", pred_id, ");\n");
} }
} }
@ -1739,7 +1727,6 @@ void gen_cancel(Context *c, YYLTYPE *locp)
void gen_load_cancel(Context *c, YYLTYPE *locp) void gen_load_cancel(Context *c, YYLTYPE *locp)
{ {
gen_cancel(c, locp);
OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n"); OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n");
OUT(c, locp, "ctx->s1_store_processed = false;\n"); OUT(c, locp, "ctx->s1_store_processed = false;\n");
OUT(c, locp, "process_store(ctx, 1);\n"); OUT(c, locp, "process_store(ctx, 1);\n");

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -205,26 +205,11 @@ static inline void gen_cancel(uint32_t slot)
#define CANCEL gen_cancel(slot); #define CANCEL gen_cancel(slot);
#else #else
#define CANCEL cancel_slot(env, slot) #define CANCEL do { } while (0)
#endif #endif
#define LOAD_CANCEL(EA) do { CANCEL; } while (0) #define LOAD_CANCEL(EA) do { CANCEL; } while (0)
#ifdef QEMU_GENERATE
static inline void gen_pred_cancel(TCGv pred, uint32_t slot_num)
{
TCGv slot_mask = tcg_temp_new();
TCGv tmp = tcg_temp_new();
TCGv zero = tcg_constant_tl(0);
tcg_gen_ori_tl(slot_mask, hex_slot_cancelled, 1 << slot_num);
tcg_gen_andi_tl(tmp, pred, 1);
tcg_gen_movcond_tl(TCG_COND_EQ, hex_slot_cancelled, tmp, zero,
slot_mask, hex_slot_cancelled);
}
#define PRED_LOAD_CANCEL(PRED, EA) \
gen_pred_cancel(PRED, insn->is_endloop ? 4 : insn->slot)
#endif
#define STORE_CANCEL(EA) { env->slot_cancelled |= (1 << slot); } #define STORE_CANCEL(EA) { env->slot_cancelled |= (1 << slot); }
#define fMAX(A, B) (((A) > (B)) ? (A) : (B)) #define fMAX(A, B) (((A) > (B)) ? (A) : (B))
@ -415,16 +400,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
#define fBRANCH(LOC, TYPE) fWRITE_NPC(LOC) #define fBRANCH(LOC, TYPE) fWRITE_NPC(LOC)
#define fJUMPR(REGNO, TARGET, TYPE) fBRANCH(TARGET, COF_TYPE_JUMPR) #define fJUMPR(REGNO, TARGET, TYPE) fBRANCH(TARGET, COF_TYPE_JUMPR)
#define fHINTJR(TARGET) { /* Not modelled in qemu */} #define fHINTJR(TARGET) { /* Not modelled in qemu */}
#define fCALL(A) \
do { \
fWRITE_LR(fREAD_NPC()); \
fBRANCH(A, COF_TYPE_CALL); \
} while (0)
#define fCALLR(A) \
do { \
fWRITE_LR(fREAD_NPC()); \
fBRANCH(A, COF_TYPE_CALLR); \
} while (0)
#define fWRITE_LOOP_REGS0(START, COUNT) \ #define fWRITE_LOOP_REGS0(START, COUNT) \
do { \ do { \
WRITE_RREG(HEX_REG_LC0, COUNT); \ WRITE_RREG(HEX_REG_LC0, COUNT); \

View File

@ -1,5 +1,5 @@
## ##
## Copyright(c) 2020-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. ## Copyright(c) 2020-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
## ##
## This program is free software; you can redistribute it and/or modify ## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by ## it under the terms of the GNU General Public License as published by
@ -276,4 +276,13 @@ tcg_funcs_generated = custom_target(
) )
hexagon_ss.add(tcg_funcs_generated) hexagon_ss.add(tcg_funcs_generated)
analyze_funcs_generated = custom_target(
'analyze_funcs_generated.c.inc',
output: 'analyze_funcs_generated.c.inc',
depends: helper_dep,
depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h],
command: [python, files('gen_analyze_funcs.py'), helper_in, '@OUTPUT@'],
)
hexagon_ss.add(analyze_funcs_generated)
target_arch += {'hexagon': hexagon_ss} target_arch += {'hexagon': hexagon_ss}

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -30,6 +30,7 @@
#include "mmvec/mmvec.h" #include "mmvec/mmvec.h"
#include "mmvec/macros.h" #include "mmvec/macros.h"
#include "op_helper.h" #include "op_helper.h"
#include "translate.h"
#define SF_BIAS 127 #define SF_BIAS 127
#define SF_MANTBITS 23 #define SF_MANTBITS 23
@ -105,30 +106,6 @@ void log_store64(CPUHexagonState *env, target_ulong addr,
env->mem_log_stores[slot].data64 = val; env->mem_log_stores[slot].data64 = val;
} }
void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof,
target_ulong addr)
{
HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
if (pkt_has_multi_cof) {
/*
* If more than one branch is taken in a packet, only the first one
* is actually done.
*/
if (env->branch_taken) {
HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
"ignoring the second one\n");
} else {
fCHECK_PCALIGN(addr);
env->gpr[HEX_REG_PC] = addr;
env->branch_taken = 1;
}
} else {
fCHECK_PCALIGN(addr);
env->gpr[HEX_REG_PC] = addr;
}
}
/* Handy place to set a breakpoint */ /* Handy place to set a breakpoint */
void HELPER(debug_start_packet)(CPUHexagonState *env) void HELPER(debug_start_packet)(CPUHexagonState *env)
{ {
@ -439,9 +416,10 @@ int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
return PeV; return PeV;
} }
static void probe_store(CPUHexagonState *env, int slot, int mmu_idx) static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
bool is_predicated)
{ {
if (!(env->slot_cancelled & (1 << slot))) { if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
size1u_t width = env->mem_log_stores[slot].width; size1u_t width = env->mem_log_stores[slot].width;
target_ulong va = env->mem_log_stores[slot].va; target_ulong va = env->mem_log_stores[slot].va;
uintptr_t ra = GETPC(); uintptr_t ra = GETPC();
@ -461,9 +439,12 @@ void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va,
} }
/* Called during packet commit when there are two scalar stores */ /* Called during packet commit when there are two scalar stores */
void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx) void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int args)
{ {
probe_store(env, 0, mmu_idx); int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX);
bool is_predicated =
FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED);
probe_store(env, 0, mmu_idx, is_predicated);
} }
void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx) void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
@ -510,15 +491,18 @@ void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask, void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask,
int mmu_idx) int mmu_idx)
{ {
bool has_st0 = (mask >> 0) & 1; bool has_st0 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0);
bool has_st1 = (mask >> 1) & 1; bool has_st1 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1);
bool has_hvx_stores = (mask >> 2) & 1; bool has_hvx_stores =
FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES);
bool s0_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED);
bool s1_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED);
if (has_st0) { if (has_st0) {
probe_store(env, 0, mmu_idx); probe_store(env, 0, mmu_idx, s0_is_pred);
} }
if (has_st1) { if (has_st1) {
probe_store(env, 1, mmu_idx); probe_store(env, 1, mmu_idx, s1_is_pred);
} }
if (has_hvx_stores) { if (has_hvx_stores) {
HELPER(probe_hvx_stores)(env, mmu_idx); HELPER(probe_hvx_stores)(env, mmu_idx);
@ -1193,7 +1177,7 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
{ {
float32 neg_RsV; float32 neg_RsV;
arch_fpop_start(env); arch_fpop_start(env);
neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status); neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status); RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
arch_fpop_end(env); arch_fpop_end(env);
return RxV; return RxV;
@ -1468,12 +1452,6 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
} }
} }
void cancel_slot(CPUHexagonState *env, uint32_t slot)
{
HEX_DEBUG_LOG("Slot %d cancelled\n", slot);
env->slot_cancelled |= (1 << slot);
}
/* These macros can be referenced in the generated helper functions */ /* These macros can be referenced in the generated helper functions */
#define warn(...) /* Nothing */ #define warn(...) /* Nothing */
#define fatal(...) g_assert_not_reached(); #define fatal(...) g_assert_not_reached();

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -19,7 +19,6 @@
#define HEXAGON_OP_HELPER_H #define HEXAGON_OP_HELPER_H
/* Misc functions */ /* Misc functions */
void cancel_slot(CPUHexagonState *env, uint32_t slot);
void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof, target_ulong addr); void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof, target_ulong addr);
uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -29,6 +29,15 @@
#include "translate.h" #include "translate.h"
#include "printinsn.h" #include "printinsn.h"
#include "analyze_funcs_generated.c.inc"
typedef void (*AnalyzeInsn)(DisasContext *ctx);
static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
#define OPCODE(X) [X] = analyze_##X
#include "opcodes_def_generated.h.inc"
#undef OPCODE
};
TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
TCGv hex_pred[NUM_PREGS]; TCGv hex_pred[NUM_PREGS];
TCGv hex_this_PC; TCGv hex_this_PC;
@ -47,8 +56,6 @@ TCGv hex_dczero_addr;
TCGv hex_llsc_addr; TCGv hex_llsc_addr;
TCGv hex_llsc_val; TCGv hex_llsc_val;
TCGv_i64 hex_llsc_val_i64; TCGv_i64 hex_llsc_val_i64;
TCGv hex_VRegs_updated;
TCGv hex_QRegs_updated;
TCGv hex_vstore_addr[VSTORES_MAX]; TCGv hex_vstore_addr[VSTORES_MAX];
TCGv hex_vstore_size[VSTORES_MAX]; TCGv hex_vstore_size[VSTORES_MAX];
TCGv hex_vstore_pending[VSTORES_MAX]; TCGv hex_vstore_pending[VSTORES_MAX];
@ -239,7 +246,15 @@ static bool check_for_attrib(Packet *pkt, int attrib)
static bool need_slot_cancelled(Packet *pkt) static bool need_slot_cancelled(Packet *pkt)
{ {
return check_for_attrib(pkt, A_CONDEXEC); /* We only need slot_cancelled for conditional store instructions */
for (int i = 0; i < pkt->num_insns; i++) {
uint16_t opcode = pkt->insn[i].opcode;
if (GET_ATTRIB(opcode, A_CONDEXEC) &&
GET_ATTRIB(opcode, A_SCALAR_STORE)) {
return true;
}
}
return false;
} }
static bool need_pred_written(Packet *pkt) static bool need_pred_written(Packet *pkt)
@ -265,6 +280,77 @@ static bool need_next_PC(DisasContext *ctx)
return false; return false;
} }
/*
* The opcode_analyze functions mark most of the writes in a packet
* However, there are some implicit writes marked as attributes
* of the applicable instructions.
*/
static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
{
uint16_t opcode = ctx->insn->opcode;
if (GET_ATTRIB(opcode, attrib)) {
/*
* USR is used to set overflow and FP exceptions,
* so treat it as conditional
*/
bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
rnum == HEX_REG_USR;
/* LC0/LC1 is conditionally written by endloop instructions */
if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
(opcode == J2_endloop0 ||
opcode == J2_endloop1 ||
opcode == J2_endloop01)) {
is_predicated = true;
}
ctx_log_reg_write(ctx, rnum, is_predicated);
}
}
static void mark_implicit_reg_writes(DisasContext *ctx)
{
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
}
static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
{
if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
ctx_log_pred_write(ctx, pnum);
}
}
static void mark_implicit_pred_writes(DisasContext *ctx)
{
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
}
static void analyze_packet(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
ctx->need_pkt_has_store_s1 = false;
for (int i = 0; i < pkt->num_insns; i++) {
Insn *insn = &pkt->insn[i];
ctx->insn = insn;
if (opcode_analyze[insn->opcode]) {
opcode_analyze[insn->opcode](ctx);
}
mark_implicit_reg_writes(ctx);
mark_implicit_pred_writes(ctx);
}
}
static void gen_start_packet(DisasContext *ctx) static void gen_start_packet(DisasContext *ctx)
{ {
Packet *pkt = ctx->pkt; Packet *pkt = ctx->pkt;
@ -275,6 +361,7 @@ static void gen_start_packet(DisasContext *ctx)
ctx->next_PC = next_PC; ctx->next_PC = next_PC;
ctx->reg_log_idx = 0; ctx->reg_log_idx = 0;
bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
ctx->preg_log_idx = 0; ctx->preg_log_idx = 0;
bitmap_zero(ctx->pregs_written, NUM_PREGS); bitmap_zero(ctx->pregs_written, NUM_PREGS);
ctx->future_vregs_idx = 0; ctx->future_vregs_idx = 0;
@ -283,14 +370,27 @@ static void gen_start_packet(DisasContext *ctx)
bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS); bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
bitmap_zero(ctx->vregs_updated, NUM_VREGS); bitmap_zero(ctx->vregs_updated, NUM_VREGS);
bitmap_zero(ctx->vregs_select, NUM_VREGS); bitmap_zero(ctx->vregs_select, NUM_VREGS);
bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
ctx->qreg_log_idx = 0; ctx->qreg_log_idx = 0;
for (i = 0; i < STORES_MAX; i++) { for (i = 0; i < STORES_MAX; i++) {
ctx->store_width[i] = 0; ctx->store_width[i] = 0;
} }
tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
ctx->s1_store_processed = false; ctx->s1_store_processed = false;
ctx->pre_commit = true; ctx->pre_commit = true;
analyze_packet(ctx);
if (ctx->need_pkt_has_store_s1) {
tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
}
/*
* pregs_written is used both in the analyze phase as well as the code
* gen phase, so clear it again.
*/
bitmap_zero(ctx->pregs_written, NUM_PREGS);
if (HEX_DEBUG) { if (HEX_DEBUG) {
/* Handy place to set a breakpoint before the packet executes */ /* Handy place to set a breakpoint before the packet executes */
gen_helper_debug_start_packet(cpu_env); gen_helper_debug_start_packet(cpu_env);
@ -313,9 +413,42 @@ static void gen_start_packet(DisasContext *ctx)
tcg_gen_movi_tl(hex_pred_written, 0); tcg_gen_movi_tl(hex_pred_written, 0);
} }
if (pkt->pkt_has_hvx) { /* Preload the predicated registers into hex_new_value[i] */
tcg_gen_movi_tl(hex_VRegs_updated, 0); if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
tcg_gen_movi_tl(hex_QRegs_updated, 0); int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
while (i < TOTAL_PER_THREAD_REGS) {
tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
i + 1);
}
}
/* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) {
int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS);
while (i < NUM_VREGS) {
const intptr_t VdV_off =
ctx_future_vreg_off(ctx, i, 1, true);
intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
tcg_gen_gvec_mov(MO_64, VdV_off,
src_off,
sizeof(MMVector),
sizeof(MMVector));
i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1);
}
}
if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) {
int i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS);
while (i < NUM_VREGS) {
const intptr_t VdV_off =
ctx_tmp_vreg_off(ctx, i, 1, true);
intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
tcg_gen_gvec_mov(MO_64, VdV_off,
src_off,
sizeof(MMVector),
sizeof(MMVector));
i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
}
} }
} }
@ -336,66 +469,6 @@ bool is_gather_store_insn(DisasContext *ctx)
return false; return false;
} }
/*
* The LOG_*_WRITE macros mark most of the writes in a packet
* However, there are some implicit writes marked as attributes
* of the applicable instructions.
*/
static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
{
uint16_t opcode = ctx->insn->opcode;
if (GET_ATTRIB(opcode, attrib)) {
/*
* USR is used to set overflow and FP exceptions,
* so treat it as conditional
*/
bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
rnum == HEX_REG_USR;
/* LC0/LC1 is conditionally written by endloop instructions */
if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
(opcode == J2_endloop0 ||
opcode == J2_endloop1 ||
opcode == J2_endloop01)) {
is_predicated = true;
}
if (is_predicated && !is_preloaded(ctx, rnum)) {
tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]);
}
ctx_log_reg_write(ctx, rnum);
}
}
static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
{
if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
ctx_log_pred_write(ctx, pnum);
}
}
static void mark_implicit_reg_writes(DisasContext *ctx)
{
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
}
static void mark_implicit_pred_writes(DisasContext *ctx)
{
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
}
static void mark_store_width(DisasContext *ctx) static void mark_store_width(DisasContext *ctx)
{ {
uint16_t opcode = ctx->insn->opcode; uint16_t opcode = ctx->insn->opcode;
@ -423,9 +496,7 @@ static void mark_store_width(DisasContext *ctx)
static void gen_insn(DisasContext *ctx) static void gen_insn(DisasContext *ctx)
{ {
if (ctx->insn->generate) { if (ctx->insn->generate) {
mark_implicit_reg_writes(ctx);
ctx->insn->generate(ctx); ctx->insn->generate(ctx);
mark_implicit_pred_writes(ctx);
mark_store_width(ctx); mark_store_width(ctx);
} else { } else {
gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
@ -646,65 +717,31 @@ static void gen_commit_hvx(DisasContext *ctx)
/* /*
* for (i = 0; i < ctx->vreg_log_idx; i++) { * for (i = 0; i < ctx->vreg_log_idx; i++) {
* int rnum = ctx->vreg_log[i]; * int rnum = ctx->vreg_log[i];
* if (ctx->vreg_is_predicated[i]) { * env->VRegs[rnum] = env->future_VRegs[rnum];
* if (env->VRegs_updated & (1 << rnum)) {
* env->VRegs[rnum] = env->future_VRegs[rnum];
* }
* } else {
* env->VRegs[rnum] = env->future_VRegs[rnum];
* }
* } * }
*/ */
for (i = 0; i < ctx->vreg_log_idx; i++) { for (i = 0; i < ctx->vreg_log_idx; i++) {
int rnum = ctx->vreg_log[i]; int rnum = ctx->vreg_log[i];
bool is_predicated = ctx->vreg_is_predicated[i];
intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]); intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false); intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
size_t size = sizeof(MMVector); size_t size = sizeof(MMVector);
if (is_predicated) { tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
TCGv cmp = tcg_temp_new();
TCGLabel *label_skip = gen_new_label();
tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum);
tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
gen_set_label(label_skip);
} else {
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
}
} }
/* /*
* for (i = 0; i < ctx->qreg_log_idx; i++) { * for (i = 0; i < ctx->qreg_log_idx; i++) {
* int rnum = ctx->qreg_log[i]; * int rnum = ctx->qreg_log[i];
* if (ctx->qreg_is_predicated[i]) { * env->QRegs[rnum] = env->future_QRegs[rnum];
* if (env->QRegs_updated) & (1 << rnum)) {
* env->QRegs[rnum] = env->future_QRegs[rnum];
* }
* } else {
* env->QRegs[rnum] = env->future_QRegs[rnum];
* }
* } * }
*/ */
for (i = 0; i < ctx->qreg_log_idx; i++) { for (i = 0; i < ctx->qreg_log_idx; i++) {
int rnum = ctx->qreg_log[i]; int rnum = ctx->qreg_log[i];
bool is_predicated = ctx->qreg_is_predicated[i];
intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]); intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]); intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
size_t size = sizeof(MMQReg); size_t size = sizeof(MMQReg);
if (is_predicated) { tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
TCGv cmp = tcg_temp_new();
TCGLabel *label_skip = gen_new_label();
tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum);
tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
gen_set_label(label_skip);
} else {
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
}
} }
if (pkt_has_hvx_store(ctx->pkt)) { if (pkt_has_hvx_store(ctx->pkt)) {
@ -775,13 +812,27 @@ static void gen_commit_packet(DisasContext *ctx)
TCGv mask_tcgv; TCGv mask_tcgv;
if (has_store_s0) { if (has_store_s0) {
mask |= (1 << 0); mask =
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1);
} }
if (has_store_s1) { if (has_store_s1) {
mask |= (1 << 1); mask =
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1);
} }
if (has_hvx_store) { if (has_hvx_store) {
mask |= (1 << 2); mask =
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
HAS_HVX_STORES, 1);
}
if (has_store_s0 && slot_is_predicated(pkt, 0)) {
mask =
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
S0_IS_PRED, 1);
}
if (has_store_s1 && slot_is_predicated(pkt, 1)) {
mask =
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
S1_IS_PRED, 1);
} }
mask_tcgv = tcg_constant_tl(mask); mask_tcgv = tcg_constant_tl(mask);
gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx); gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx);
@ -791,8 +842,15 @@ static void gen_commit_packet(DisasContext *ctx)
* process_store_log will execute the slot 1 store first, * process_store_log will execute the slot 1 store first,
* so we only have to probe the store in slot 0 * so we only have to probe the store in slot 0
*/ */
TCGv mem_idx = tcg_constant_tl(ctx->mem_idx); int args = 0;
gen_helper_probe_pkt_scalar_store_s0(cpu_env, mem_idx); args =
FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx);
if (slot_is_predicated(pkt, 0)) {
args =
FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1);
}
TCGv args_tcgv = tcg_constant_tl(args);
gen_helper_probe_pkt_scalar_store_s0(cpu_env, args_tcgv);
} }
process_store_log(ctx); process_store_log(ctx);
@ -1029,10 +1087,6 @@ void hexagon_translate_init(void)
offsetof(CPUHexagonState, llsc_val), "llsc_val"); offsetof(CPUHexagonState, llsc_val), "llsc_val");
hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env, hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
hex_VRegs_updated = tcg_global_mem_new(cpu_env,
offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated");
hex_QRegs_updated = tcg_global_mem_new(cpu_env,
offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated");
for (i = 0; i < STORES_MAX; i++) { for (i = 0; i < STORES_MAX; i++) {
snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
hex_store_addr[i] = tcg_global_mem_new(cpu_env, hex_store_addr[i] = tcg_global_mem_new(cpu_env,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -38,6 +38,7 @@ typedef struct DisasContext {
int reg_log[REG_WRITES_MAX]; int reg_log[REG_WRITES_MAX];
int reg_log_idx; int reg_log_idx;
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
int preg_log[PRED_WRITES_MAX]; int preg_log[PRED_WRITES_MAX];
int preg_log_idx; int preg_log_idx;
DECLARE_BITMAP(pregs_written, NUM_PREGS); DECLARE_BITMAP(pregs_written, NUM_PREGS);
@ -48,52 +49,54 @@ typedef struct DisasContext {
int tmp_vregs_idx; int tmp_vregs_idx;
int tmp_vregs_num[VECTOR_TEMPS_MAX]; int tmp_vregs_num[VECTOR_TEMPS_MAX];
int vreg_log[NUM_VREGS]; int vreg_log[NUM_VREGS];
bool vreg_is_predicated[NUM_VREGS];
int vreg_log_idx; int vreg_log_idx;
DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS); DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
DECLARE_BITMAP(vregs_updated, NUM_VREGS); DECLARE_BITMAP(vregs_updated, NUM_VREGS);
DECLARE_BITMAP(vregs_select, NUM_VREGS); DECLARE_BITMAP(vregs_select, NUM_VREGS);
DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
int qreg_log[NUM_QREGS]; int qreg_log[NUM_QREGS];
bool qreg_is_predicated[NUM_QREGS];
int qreg_log_idx; int qreg_log_idx;
bool pre_commit; bool pre_commit;
TCGCond branch_cond; TCGCond branch_cond;
target_ulong branch_dest; target_ulong branch_dest;
bool is_tight_loop; bool is_tight_loop;
bool need_pkt_has_store_s1;
} DisasContext; } DisasContext;
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
{
if (test_bit(rnum, ctx->regs_written)) {
HEX_DEBUG_LOG("WARNING: Multiple writes to r%d\n", rnum);
}
ctx->reg_log[ctx->reg_log_idx] = rnum;
ctx->reg_log_idx++;
set_bit(rnum, ctx->regs_written);
}
static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum)
{
ctx_log_reg_write(ctx, rnum);
ctx_log_reg_write(ctx, rnum + 1);
}
static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
{ {
ctx->preg_log[ctx->preg_log_idx] = pnum; if (!test_bit(pnum, ctx->pregs_written)) {
ctx->preg_log_idx++; ctx->preg_log[ctx->preg_log_idx] = pnum;
set_bit(pnum, ctx->pregs_written); ctx->preg_log_idx++;
set_bit(pnum, ctx->pregs_written);
}
} }
static inline bool is_preloaded(DisasContext *ctx, int num) static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
bool is_predicated)
{ {
return test_bit(num, ctx->regs_written); if (rnum == HEX_REG_P3_0_ALIASED) {
for (int i = 0; i < NUM_PREGS; i++) {
ctx_log_pred_write(ctx, i);
}
} else {
if (!test_bit(rnum, ctx->regs_written)) {
ctx->reg_log[ctx->reg_log_idx] = rnum;
ctx->reg_log_idx++;
set_bit(rnum, ctx->regs_written);
}
if (is_predicated) {
set_bit(rnum, ctx->predicated_regs);
}
}
} }
static inline bool is_vreg_preloaded(DisasContext *ctx, int num) static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
bool is_predicated)
{ {
return test_bit(num, ctx->vregs_updated) || ctx_log_reg_write(ctx, rnum, is_predicated);
test_bit(num, ctx->vregs_updated_tmp); ctx_log_reg_write(ctx, rnum + 1, is_predicated);
} }
intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
@ -106,17 +109,25 @@ static inline void ctx_log_vreg_write(DisasContext *ctx,
bool is_predicated) bool is_predicated)
{ {
if (type != EXT_TMP) { if (type != EXT_TMP) {
ctx->vreg_log[ctx->vreg_log_idx] = rnum; if (!test_bit(rnum, ctx->vregs_updated)) {
ctx->vreg_is_predicated[ctx->vreg_log_idx] = is_predicated; ctx->vreg_log[ctx->vreg_log_idx] = rnum;
ctx->vreg_log_idx++; ctx->vreg_log_idx++;
set_bit(rnum, ctx->vregs_updated);
}
set_bit(rnum, ctx->vregs_updated); set_bit(rnum, ctx->vregs_updated);
if (is_predicated) {
set_bit(rnum, ctx->predicated_future_vregs);
}
} }
if (type == EXT_NEW) { if (type == EXT_NEW) {
set_bit(rnum, ctx->vregs_select); set_bit(rnum, ctx->vregs_select);
} }
if (type == EXT_TMP) { if (type == EXT_TMP) {
set_bit(rnum, ctx->vregs_updated_tmp); set_bit(rnum, ctx->vregs_updated_tmp);
if (is_predicated) {
set_bit(rnum, ctx->predicated_tmp_vregs);
}
} }
} }
@ -129,10 +140,9 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
} }
static inline void ctx_log_qreg_write(DisasContext *ctx, static inline void ctx_log_qreg_write(DisasContext *ctx,
int rnum, bool is_predicated) int rnum)
{ {
ctx->qreg_log[ctx->qreg_log_idx] = rnum; ctx->qreg_log[ctx->qreg_log_idx] = rnum;
ctx->qreg_is_predicated[ctx->qreg_log_idx] = is_predicated;
ctx->qreg_log_idx++; ctx->qreg_log_idx++;
} }
@ -153,12 +163,20 @@ extern TCGv hex_dczero_addr;
extern TCGv hex_llsc_addr; extern TCGv hex_llsc_addr;
extern TCGv hex_llsc_val; extern TCGv hex_llsc_val;
extern TCGv_i64 hex_llsc_val_i64; extern TCGv_i64 hex_llsc_val_i64;
extern TCGv hex_VRegs_updated;
extern TCGv hex_QRegs_updated;
extern TCGv hex_vstore_addr[VSTORES_MAX]; extern TCGv hex_vstore_addr[VSTORES_MAX];
extern TCGv hex_vstore_size[VSTORES_MAX]; extern TCGv hex_vstore_size[VSTORES_MAX];
extern TCGv hex_vstore_pending[VSTORES_MAX]; extern TCGv hex_vstore_pending[VSTORES_MAX];
bool is_gather_store_insn(DisasContext *ctx); bool is_gather_store_insn(DisasContext *ctx);
void process_store(DisasContext *ctx, int slot_num); void process_store(DisasContext *ctx, int slot_num);
FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2)
FIELD(PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 2, 1)
FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 0, 1)
FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1, 1)
FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES, 2, 1)
FIELD(PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED, 3, 1)
FIELD(PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED, 4, 1)
#endif #endif

View File

@ -1,5 +1,5 @@
## ##
## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. ## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
## ##
## This program is free software; you can redistribute it and/or modify ## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by ## it under the terms of the GNU General Public License as published by
@ -45,6 +45,10 @@ HEX_TESTS += fpstuff
HEX_TESTS += overflow HEX_TESTS += overflow
HEX_TESTS += signal_context HEX_TESTS += signal_context
HEX_TESTS += reg_mut HEX_TESTS += reg_mut
HEX_TESTS += vector_add_int
HEX_TESTS += scatter_gather
HEX_TESTS += hvx_misc
HEX_TESTS += hvx_histogram
HEX_TESTS += test_abs HEX_TESTS += test_abs
HEX_TESTS += test_bitcnt HEX_TESTS += test_bitcnt
@ -78,3 +82,10 @@ TESTS += $(HEX_TESTS)
usr: usr.c usr: usr.c
$(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS) $(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS)
scatter_gather: CFLAGS += -mhvx
vector_add_int: CFLAGS += -mhvx -fvectorize
hvx_misc: CFLAGS += -mhvx
hvx_histogram: CFLAGS += -mhvx -Wno-gnu-folding-constant
hvx_histogram: hvx_histogram.c hvx_histogram_row.S
$(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS)

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2020-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2020-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -40,6 +40,7 @@ const int SF_HEX_NAN = 0xffffffff;
const int SF_small_neg = 0xab98fba8; const int SF_small_neg = 0xab98fba8;
const int SF_denorm = 0x00000001; const int SF_denorm = 0x00000001;
const int SF_random = 0x346001d6; const int SF_random = 0x346001d6;
const int SF_neg_zero = 0x80000000;
const long long DF_QNaN = 0x7ff8000000000000ULL; const long long DF_QNaN = 0x7ff8000000000000ULL;
const long long DF_SNaN = 0x7ff7000000000000ULL; const long long DF_SNaN = 0x7ff7000000000000ULL;
@ -536,6 +537,33 @@ static void check_sffixupd(void)
check32(result, 0x146001d6); check32(result, 0x146001d6);
} }
static void check_sffms(void)
{
int result;
/* Check that sffms properly deals with -0 */
result = SF_neg_zero;
asm ("%0 -= sfmpy(%1 , %2)\n\t"
: "+r"(result)
: "r"(SF_ZERO), "r"(SF_ZERO)
: "r12", "r8");
check32(result, SF_neg_zero);
result = SF_ZERO;
asm ("%0 -= sfmpy(%1 , %2)\n\t"
: "+r"(result)
: "r"(SF_neg_zero), "r"(SF_ZERO)
: "r12", "r8");
check32(result, SF_ZERO);
result = SF_ZERO;
asm ("%0 -= sfmpy(%1 , %2)\n\t"
: "+r"(result)
: "r"(SF_ZERO), "r"(SF_neg_zero)
: "r12", "r8");
check32(result, SF_ZERO);
}
static void check_float2int_convs() static void check_float2int_convs()
{ {
int res32; int res32;
@ -688,6 +716,7 @@ int main()
check_invsqrta(); check_invsqrta();
check_sffixupn(); check_sffixupn();
check_sffixupd(); check_sffixupd();
check_sffms();
check_float2int_convs(); check_float2int_convs();
puts(err ? "FAIL" : "PASS"); puts(err ? "FAIL" : "PASS");

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -65,7 +65,7 @@ static inline void creg_alias(int cval, PRegs *pregs)
: "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1), : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1),
"=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3) "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3)
: "r"(cval) : "r"(cval)
: "p0", "p1", "p2", "p3"); : "c4", "p0", "p1", "p2", "p3");
} }
int err; int err;
@ -92,7 +92,7 @@ static inline void creg_alias_pair(unsigned int cval, PRegs *pregs)
: "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1), : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1),
"=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3), "=r"(c5) "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3), "=r"(c5)
: "r"(cval_pair) : "r"(cval_pair)
: "p0", "p1", "p2", "p3"); : "c4", "c5", "p0", "p1", "p2", "p3");
check(c5, 0xdeadbeef); check(c5, 0xdeadbeef);
} }
@ -117,7 +117,7 @@ static void test_packet(void)
"}\n\t" "}\n\t"
: "+r"(result) : "+r"(result)
: "r"(0xffffffff), "r"(0xff00ffff), "r"(0x837ed653) : "r"(0xffffffff), "r"(0xff00ffff), "r"(0x837ed653)
: "p0", "p1", "p2", "p3"); : "c4", "p0", "p1", "p2", "p3");
check(result, old_val); check(result, old_val);
/* Test a predicated store */ /* Test a predicated store */
@ -129,7 +129,7 @@ static void test_packet(void)
"}\n\t" "}\n\t"
: :
: "r"(0), "r"(0xffffffff), "r"(&result) : "r"(0), "r"(0xffffffff), "r"(&result)
: "p0", "p1", "p2", "p3", "memory"); : "c4", "p0", "p1", "p2", "p3", "memory");
check(result, 0x0); check(result, 0x0);
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -40,47 +40,6 @@ typedef long HVX_VectorPair __attribute__((__vector_size__(256)))
typedef long HVX_VectorPred __attribute__((__vector_size__(128))) typedef long HVX_VectorPred __attribute__((__vector_size__(128)))
__attribute__((aligned(128))); __attribute__((aligned(128)));
#define VSCATTER_16(BASE, RGN, OFF, VALS) \
__builtin_HEXAGON_V6_vscattermh_128B((int)BASE, RGN, OFF, VALS)
#define VSCATTER_16_MASKED(MASK, BASE, RGN, OFF, VALS) \
__builtin_HEXAGON_V6_vscattermhq_128B(MASK, (int)BASE, RGN, OFF, VALS)
#define VSCATTER_32(BASE, RGN, OFF, VALS) \
__builtin_HEXAGON_V6_vscattermw_128B((int)BASE, RGN, OFF, VALS)
#define VSCATTER_32_MASKED(MASK, BASE, RGN, OFF, VALS) \
__builtin_HEXAGON_V6_vscattermwq_128B(MASK, (int)BASE, RGN, OFF, VALS)
#define VSCATTER_16_32(BASE, RGN, OFF, VALS) \
__builtin_HEXAGON_V6_vscattermhw_128B((int)BASE, RGN, OFF, VALS)
#define VSCATTER_16_32_MASKED(MASK, BASE, RGN, OFF, VALS) \
__builtin_HEXAGON_V6_vscattermhwq_128B(MASK, (int)BASE, RGN, OFF, VALS)
#define VSCATTER_16_ACC(BASE, RGN, OFF, VALS) \
__builtin_HEXAGON_V6_vscattermh_add_128B((int)BASE, RGN, OFF, VALS)
#define VSCATTER_32_ACC(BASE, RGN, OFF, VALS) \
__builtin_HEXAGON_V6_vscattermw_add_128B((int)BASE, RGN, OFF, VALS)
#define VSCATTER_16_32_ACC(BASE, RGN, OFF, VALS) \
__builtin_HEXAGON_V6_vscattermhw_add_128B((int)BASE, RGN, OFF, VALS)
#define VGATHER_16(DSTADDR, BASE, RGN, OFF) \
__builtin_HEXAGON_V6_vgathermh_128B(DSTADDR, (int)BASE, RGN, OFF)
#define VGATHER_16_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
__builtin_HEXAGON_V6_vgathermhq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
#define VGATHER_32(DSTADDR, BASE, RGN, OFF) \
__builtin_HEXAGON_V6_vgathermw_128B(DSTADDR, (int)BASE, RGN, OFF)
#define VGATHER_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
__builtin_HEXAGON_V6_vgathermwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
#define VGATHER_16_32(DSTADDR, BASE, RGN, OFF) \
__builtin_HEXAGON_V6_vgathermhw_128B(DSTADDR, (int)BASE, RGN, OFF)
#define VGATHER_16_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
__builtin_HEXAGON_V6_vgathermhwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
#define VSHUFF_H(V) \
__builtin_HEXAGON_V6_vshuffh_128B(V)
#define VSPLAT_H(X) \
__builtin_HEXAGON_V6_lvsplath_128B(X)
#define VAND_VAL(PRED, VAL) \
__builtin_HEXAGON_V6_vandvrt_128B(PRED, VAL)
#define VDEAL_H(V) \
__builtin_HEXAGON_V6_vdealh_128B(V)
int err; int err;
/* define the number of rows/cols in a square matrix */ /* define the number of rows/cols in a square matrix */
@ -108,22 +67,22 @@ unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE];
unsigned short vgather16_32_ref[MATRIX_SIZE]; unsigned short vgather16_32_ref[MATRIX_SIZE];
/* declare the arrays of offsets */ /* declare the arrays of offsets */
unsigned short half_offsets[MATRIX_SIZE]; unsigned short half_offsets[MATRIX_SIZE] __attribute__((aligned(128)));
unsigned int word_offsets[MATRIX_SIZE]; unsigned int word_offsets[MATRIX_SIZE] __attribute__((aligned(128)));
/* declare the arrays of values */ /* declare the arrays of values */
unsigned short half_values[MATRIX_SIZE]; unsigned short half_values[MATRIX_SIZE] __attribute__((aligned(128)));
unsigned short half_values_acc[MATRIX_SIZE]; unsigned short half_values_acc[MATRIX_SIZE] __attribute__((aligned(128)));
unsigned short half_values_masked[MATRIX_SIZE]; unsigned short half_values_masked[MATRIX_SIZE] __attribute__((aligned(128)));
unsigned int word_values[MATRIX_SIZE]; unsigned int word_values[MATRIX_SIZE] __attribute__((aligned(128)));
unsigned int word_values_acc[MATRIX_SIZE]; unsigned int word_values_acc[MATRIX_SIZE] __attribute__((aligned(128)));
unsigned int word_values_masked[MATRIX_SIZE]; unsigned int word_values_masked[MATRIX_SIZE] __attribute__((aligned(128)));
/* declare the arrays of predicates */ /* declare the arrays of predicates */
unsigned short half_predicates[MATRIX_SIZE]; unsigned short half_predicates[MATRIX_SIZE] __attribute__((aligned(128)));
unsigned int word_predicates[MATRIX_SIZE]; unsigned int word_predicates[MATRIX_SIZE] __attribute__((aligned(128)));
/* make this big enough for all the intrinsics */ /* make this big enough for all the operations */
const size_t region_len = sizeof(vtcm); const size_t region_len = sizeof(vtcm);
/* optionally add sync instructions */ /* optionally add sync instructions */
@ -261,164 +220,201 @@ void create_offsets_values_preds_16_32(void)
} }
} }
/* scatter the 16 bit elements using intrinsics */ /* scatter the 16 bit elements using HVX */
void vector_scatter_16(void) void vector_scatter_16(void)
{ {
/* copy the offsets and values to vectors */ asm ("m0 = %1\n\t"
HVX_Vector offsets = *(HVX_Vector *)half_offsets; "v0 = vmem(%2 + #0)\n\t"
HVX_Vector values = *(HVX_Vector *)half_values; "v1 = vmem(%3 + #0)\n\t"
"vscatter(%0, m0, v0.h).h = v1\n\t"
VSCATTER_16(&vtcm.vscatter16, region_len, offsets, values); : : "r"(vtcm.vscatter16), "r"(region_len),
"r"(half_offsets), "r"(half_values)
: "m0", "v0", "v1", "memory");
sync_scatter(vtcm.vscatter16); sync_scatter(vtcm.vscatter16);
} }
/* scatter-accumulate the 16 bit elements using intrinsics */ /* scatter-accumulate the 16 bit elements using HVX */
void vector_scatter_16_acc(void) void vector_scatter_16_acc(void)
{ {
/* copy the offsets and values to vectors */ asm ("m0 = %1\n\t"
HVX_Vector offsets = *(HVX_Vector *)half_offsets; "v0 = vmem(%2 + #0)\n\t"
HVX_Vector values = *(HVX_Vector *)half_values_acc; "v1 = vmem(%3 + #0)\n\t"
"vscatter(%0, m0, v0.h).h += v1\n\t"
VSCATTER_16_ACC(&vtcm.vscatter16, region_len, offsets, values); : : "r"(vtcm.vscatter16), "r"(region_len),
"r"(half_offsets), "r"(half_values_acc)
: "m0", "v0", "v1", "memory");
sync_scatter(vtcm.vscatter16); sync_scatter(vtcm.vscatter16);
} }
/* scatter the 16 bit elements using intrinsics */ /* masked scatter the 16 bit elements using HVX */
void vector_scatter_16_masked(void) void vector_scatter_16_masked(void)
{ {
/* copy the offsets and values to vectors */ asm ("r1 = #-1\n\t"
HVX_Vector offsets = *(HVX_Vector *)half_offsets; "v0 = vmem(%0 + #0)\n\t"
HVX_Vector values = *(HVX_Vector *)half_values_masked; "q0 = vand(v0, r1)\n\t"
HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; "m0 = %2\n\t"
HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); "v0 = vmem(%3 + #0)\n\t"
"v1 = vmem(%4 + #0)\n\t"
VSCATTER_16_MASKED(preds, &vtcm.vscatter16, region_len, offsets, values); "if (q0) vscatter(%1, m0, v0.h).h = v1\n\t"
: : "r"(half_predicates), "r"(vtcm.vscatter16), "r"(region_len),
"r"(half_offsets), "r"(half_values_masked)
: "r1", "q0", "m0", "q0", "v0", "v1", "memory");
sync_scatter(vtcm.vscatter16); sync_scatter(vtcm.vscatter16);
} }
/* scatter the 32 bit elements using intrinsics */ /* scatter the 32 bit elements using HVX */
void vector_scatter_32(void) void vector_scatter_32(void)
{ {
/* copy the offsets and values to vectors */ HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; HVX_Vector *valueslo = (HVX_Vector *)word_values;
HVX_Vector valueslo = *(HVX_Vector *)word_values; HVX_Vector *valueshi = (HVX_Vector *)&word_values[MATRIX_SIZE / 2];
HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2];
VSCATTER_32(&vtcm.vscatter32, region_len, offsetslo, valueslo); asm ("m0 = %1\n\t"
VSCATTER_32(&vtcm.vscatter32, region_len, offsetshi, valueshi); "v0 = vmem(%2 + #0)\n\t"
"v1 = vmem(%3 + #0)\n\t"
"vscatter(%0, m0, v0.w).w = v1\n\t"
: : "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetslo), "r"(valueslo)
: "m0", "v0", "v1", "memory");
asm ("m0 = %1\n\t"
"v0 = vmem(%2 + #0)\n\t"
"v1 = vmem(%3 + #0)\n\t"
"vscatter(%0, m0, v0.w).w = v1\n\t"
: : "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetshi), "r"(valueshi)
: "m0", "v0", "v1", "memory");
sync_scatter(vtcm.vscatter32); sync_scatter(vtcm.vscatter32);
} }
/* scatter-acc the 32 bit elements using intrinsics */ /* scatter-accumulate the 32 bit elements using HVX */
void vector_scatter_32_acc(void) void vector_scatter_32_acc(void)
{ {
/* copy the offsets and values to vectors */ HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; HVX_Vector *valueslo = (HVX_Vector *)word_values_acc;
HVX_Vector valueslo = *(HVX_Vector *)word_values_acc; HVX_Vector *valueshi = (HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2];
HVX_Vector valueshi = *(HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2];
VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetslo, valueslo); asm ("m0 = %1\n\t"
VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetshi, valueshi); "v0 = vmem(%2 + #0)\n\t"
"v1 = vmem(%3 + #0)\n\t"
"vscatter(%0, m0, v0.w).w += v1\n\t"
: : "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetslo), "r"(valueslo)
: "m0", "v0", "v1", "memory");
asm ("m0 = %1\n\t"
"v0 = vmem(%2 + #0)\n\t"
"v1 = vmem(%3 + #0)\n\t"
"vscatter(%0, m0, v0.w).w += v1\n\t"
: : "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetshi), "r"(valueshi)
: "m0", "v0", "v1", "memory");
sync_scatter(vtcm.vscatter32); sync_scatter(vtcm.vscatter32);
} }
/* scatter the 32 bit elements using intrinsics */ /* masked scatter the 32 bit elements using HVX */
void vector_scatter_32_masked(void) void vector_scatter_32_masked(void)
{ {
/* copy the offsets and values to vectors */ HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; HVX_Vector *valueslo = (HVX_Vector *)word_values_masked;
HVX_Vector valueslo = *(HVX_Vector *)word_values_masked; HVX_Vector *valueshi = (HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2];
HVX_Vector valueshi = *(HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2]; HVX_Vector *predslo = (HVX_Vector *)word_predicates;
HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; HVX_Vector *predshi = (HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0);
HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0);
VSCATTER_32_MASKED(predslo, &vtcm.vscatter32, region_len, offsetslo, asm ("r1 = #-1\n\t"
valueslo); "v0 = vmem(%0 + #0)\n\t"
VSCATTER_32_MASKED(predshi, &vtcm.vscatter32, region_len, offsetshi, "q0 = vand(v0, r1)\n\t"
valueshi); "m0 = %2\n\t"
"v0 = vmem(%3 + #0)\n\t"
"v1 = vmem(%4 + #0)\n\t"
"if (q0) vscatter(%1, m0, v0.w).w = v1\n\t"
: : "r"(predslo), "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetslo), "r"(valueslo)
: "r1", "q0", "m0", "q0", "v0", "v1", "memory");
asm ("r1 = #-1\n\t"
"v0 = vmem(%0 + #0)\n\t"
"q0 = vand(v0, r1)\n\t"
"m0 = %2\n\t"
"v0 = vmem(%3 + #0)\n\t"
"v1 = vmem(%4 + #0)\n\t"
"if (q0) vscatter(%1, m0, v0.w).w = v1\n\t"
: : "r"(predshi), "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetshi), "r"(valueshi)
: "r1", "q0", "m0", "q0", "v0", "v1", "memory");
sync_scatter(vtcm.vscatter16); sync_scatter(vtcm.vscatter32);
} }
/* scatter the 16 bit elements with 32 bit offsets using intrinsics */ /* scatter the 16 bit elements with 32 bit offsets using HVX */
void vector_scatter_16_32(void) void vector_scatter_16_32(void)
{ {
HVX_VectorPair offsets; asm ("m0 = %1\n\t"
HVX_Vector values; "v0 = vmem(%2 + #0)\n\t"
"v1 = vmem(%2 + #1)\n\t"
/* get the word offsets in a vector pair */ "v2 = vmem(%3 + #0)\n\t"
offsets = *(HVX_VectorPair *)word_offsets; "v2.h = vshuff(v2.h)\n\t" /* shuffle the values for the scatter */
"vscatter(%0, m0, v1:0.w).h = v2\n\t"
/* these values need to be shuffled for the scatter */ : : "r"(vtcm.vscatter16_32), "r"(region_len),
values = *(HVX_Vector *)half_values; "r"(word_offsets), "r"(half_values)
values = VSHUFF_H(values); : "m0", "v0", "v1", "v2", "memory");
VSCATTER_16_32(&vtcm.vscatter16_32, region_len, offsets, values);
sync_scatter(vtcm.vscatter16_32); sync_scatter(vtcm.vscatter16_32);
} }
/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */ /* scatter-accumulate the 16 bit elements with 32 bit offsets using HVX */
void vector_scatter_16_32_acc(void) void vector_scatter_16_32_acc(void)
{ {
HVX_VectorPair offsets; asm ("m0 = %1\n\t"
HVX_Vector values; "v0 = vmem(%2 + #0)\n\t"
"v1 = vmem(%2 + #1)\n\t"
/* get the word offsets in a vector pair */ "v2 = vmem(%3 + #0)\n\t" \
offsets = *(HVX_VectorPair *)word_offsets; "v2.h = vshuff(v2.h)\n\t" /* shuffle the values for the scatter */
"vscatter(%0, m0, v1:0.w).h += v2\n\t"
/* these values need to be shuffled for the scatter */ : : "r"(vtcm.vscatter16_32), "r"(region_len),
values = *(HVX_Vector *)half_values_acc; "r"(word_offsets), "r"(half_values_acc)
values = VSHUFF_H(values); : "m0", "v0", "v1", "v2", "memory");
VSCATTER_16_32_ACC(&vtcm.vscatter16_32, region_len, offsets, values);
sync_scatter(vtcm.vscatter16_32); sync_scatter(vtcm.vscatter16_32);
} }
/* masked scatter the 16 bit elements with 32 bit offsets using intrinsics */ /* masked scatter the 16 bit elements with 32 bit offsets using HVX */
void vector_scatter_16_32_masked(void) void vector_scatter_16_32_masked(void)
{ {
HVX_VectorPair offsets; asm ("r1 = #-1\n\t"
HVX_Vector values; "v0 = vmem(%0 + #0)\n\t"
HVX_Vector pred_reg; "v0.h = vshuff(v0.h)\n\t" /* shuffle the predicates */
"q0 = vand(v0, r1)\n\t"
/* get the word offsets in a vector pair */ "m0 = %2\n\t"
offsets = *(HVX_VectorPair *)word_offsets; "v0 = vmem(%3 + #0)\n\t"
"v1 = vmem(%3 + #1)\n\t"
/* these values need to be shuffled for the scatter */ "v2 = vmem(%4 + #0)\n\t" \
values = *(HVX_Vector *)half_values_masked; "v2.h = vshuff(v2.h)\n\t" /* shuffle the values for the scatter */
values = VSHUFF_H(values); "if (q0) vscatter(%1, m0, v1:0.w).h = v2\n\t"
: : "r"(half_predicates), "r"(vtcm.vscatter16_32), "r"(region_len),
pred_reg = *(HVX_Vector *)half_predicates; "r"(word_offsets), "r"(half_values_masked)
pred_reg = VSHUFF_H(pred_reg); : "r1", "q0", "m0", "v0", "v1", "v2", "memory");
HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
VSCATTER_16_32_MASKED(preds, &vtcm.vscatter16_32, region_len, offsets,
values);
sync_scatter(vtcm.vscatter16_32); sync_scatter(vtcm.vscatter16_32);
} }
/* gather the elements from the scatter16 buffer */ /* gather the elements from the scatter16 buffer using HVX */
void vector_gather_16(void) void vector_gather_16(void)
{ {
HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; asm ("m0 = %1\n\t"
HVX_Vector offsets = *(HVX_Vector *)half_offsets; "v0 = vmem(%2 + #0)\n\t"
"{ vtmp.h = vgather(%0, m0, v0.h).h\n\t"
" vmem(%3 + #0) = vtmp.new }\n\t"
: : "r"(vtcm.vscatter16), "r"(region_len),
"r"(half_offsets), "r"(vtcm.vgather16)
: "m0", "v0", "memory");
VGATHER_16(vgather, &vtcm.vscatter16, region_len, offsets); sync_gather(vtcm.vgather16);
sync_gather(vgather);
} }
static unsigned short gather_16_masked_init(void) static unsigned short gather_16_masked_init(void)
@ -427,31 +423,51 @@ static unsigned short gather_16_masked_init(void)
return letter | (letter << 8); return letter | (letter << 8);
} }
/* masked gather the elements from the scatter16 buffer using HVX */
void vector_gather_16_masked(void) void vector_gather_16_masked(void)
{ {
HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; unsigned short init = gather_16_masked_init();
HVX_Vector offsets = *(HVX_Vector *)half_offsets;
HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
*vgather = VSPLAT_H(gather_16_masked_init()); asm ("v0.h = vsplat(%5)\n\t"
VGATHER_16_MASKED(vgather, preds, &vtcm.vscatter16, region_len, offsets); "vmem(%4 + #0) = v0\n\t" /* initialize the write area */
"r1 = #-1\n\t"
"v0 = vmem(%0 + #0)\n\t"
"q0 = vand(v0, r1)\n\t"
"m0 = %2\n\t"
"v0 = vmem(%3 + #0)\n\t"
"{ if (q0) vtmp.h = vgather(%1, m0, v0.h).h\n\t"
" vmem(%4 + #0) = vtmp.new }\n\t"
: : "r"(half_predicates), "r"(vtcm.vscatter16), "r"(region_len),
"r"(half_offsets), "r"(vtcm.vgather16), "r"(init)
: "r1", "q0", "m0", "v0", "memory");
sync_gather(vgather); sync_gather(vtcm.vgather16);
} }
/* gather the elements from the scatter32 buffer */ /* gather the elements from the scatter32 buffer using HVX */
void vector_gather_32(void) void vector_gather_32(void)
{ {
HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; HVX_Vector *vgatherlo = (HVX_Vector *)vtcm.vgather32;
HVX_Vector *vgatherhi = HVX_Vector *vgatherhi = (HVX_Vector *)&vtcm.vgather32[MATRIX_SIZE / 2];
(HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
VGATHER_32(vgatherlo, &vtcm.vscatter32, region_len, offsetslo); asm ("m0 = %1\n\t"
VGATHER_32(vgatherhi, &vtcm.vscatter32, region_len, offsetshi); "v0 = vmem(%2 + #0)\n\t"
"{ vtmp.w = vgather(%0, m0, v0.w).w\n\t"
" vmem(%3 + #0) = vtmp.new }\n\t"
: : "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetslo), "r"(vgatherlo)
: "m0", "v0", "memory");
asm ("m0 = %1\n\t"
"v0 = vmem(%2 + #0)\n\t"
"{ vtmp.w = vgather(%0, m0, v0.w).w\n\t"
" vmem(%3 + #0) = vtmp.new }\n\t"
: : "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetshi), "r"(vgatherhi)
: "m0", "v0", "memory");
sync_gather(vgatherlo);
sync_gather(vgatherhi); sync_gather(vgatherhi);
} }
@ -461,79 +477,88 @@ static unsigned int gather_32_masked_init(void)
return letter | (letter << 8) | (letter << 16) | (letter << 24); return letter | (letter << 8) | (letter << 16) | (letter << 24);
} }
/* masked gather the elements from the scatter32 buffer using HVX */
void vector_gather_32_masked(void) void vector_gather_32_masked(void)
{ {
HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; unsigned int init = gather_32_masked_init();
HVX_Vector *vgatherhi = HVX_Vector *vgatherlo = (HVX_Vector *)vtcm.vgather32;
(HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); HVX_Vector *vgatherhi = (HVX_Vector *)&vtcm.vgather32[MATRIX_SIZE / 2];
HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; HVX_Vector *offsetslo = (HVX_Vector *)word_offsets;
HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; HVX_Vector *predslo = (HVX_Vector *)word_predicates;
HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0); HVX_Vector *predshi = (HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0);
*vgatherlo = VSPLAT_H(gather_32_masked_init()); asm ("v0.h = vsplat(%5)\n\t"
*vgatherhi = VSPLAT_H(gather_32_masked_init()); "vmem(%4 + #0) = v0\n\t" /* initialize the write area */
VGATHER_32_MASKED(vgatherlo, predslo, &vtcm.vscatter32, region_len, "r1 = #-1\n\t"
offsetslo); "v0 = vmem(%0 + #0)\n\t"
VGATHER_32_MASKED(vgatherhi, predshi, &vtcm.vscatter32, region_len, "q0 = vand(v0, r1)\n\t"
offsetshi); "m0 = %2\n\t"
"v0 = vmem(%3 + #0)\n\t"
"{ if (q0) vtmp.w = vgather(%1, m0, v0.w).w\n\t"
" vmem(%4 + #0) = vtmp.new }\n\t"
: : "r"(predslo), "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetslo), "r"(vgatherlo), "r"(init)
: "r1", "q0", "m0", "v0", "memory");
asm ("v0.h = vsplat(%5)\n\t"
"vmem(%4 + #0) = v0\n\t" /* initialize the write area */
"r1 = #-1\n\t"
"v0 = vmem(%0 + #0)\n\t"
"q0 = vand(v0, r1)\n\t"
"m0 = %2\n\t"
"v0 = vmem(%3 + #0)\n\t"
"{ if (q0) vtmp.w = vgather(%1, m0, v0.w).w\n\t"
" vmem(%4 + #0) = vtmp.new }\n\t"
: : "r"(predshi), "r"(vtcm.vscatter32), "r"(region_len),
"r"(offsetshi), "r"(vgatherhi), "r"(init)
: "r1", "q0", "m0", "v0", "memory");
sync_gather(vgatherlo); sync_gather(vgatherlo);
sync_gather(vgatherhi); sync_gather(vgatherhi);
} }
/* gather the elements from the scatter16_32 buffer */ /* gather the elements from the scatter16_32 buffer using HVX */
void vector_gather_16_32(void) void vector_gather_16_32(void)
{ {
HVX_Vector *vgather; asm ("m0 = %1\n\t"
HVX_VectorPair offsets; "v0 = vmem(%2 + #0)\n\t"
HVX_Vector values; "v1 = vmem(%2 + #1)\n\t"
"{ vtmp.h = vgather(%0, m0, v1:0.w).h\n\t"
" vmem(%3 + #0) = vtmp.new }\n\t"
"v0 = vmem(%3 + #0)\n\t"
"v0.h = vdeal(v0.h)\n\t" /* deal the elements to get the order back */
"vmem(%3 + #0) = v0\n\t"
: : "r"(vtcm.vscatter16_32), "r"(region_len),
"r"(word_offsets), "r"(vtcm.vgather16_32)
: "m0", "v0", "v1", "memory");
/* get the vtcm address to gather from */ sync_gather(vtcm.vgather16_32);
vgather = (HVX_Vector *)&vtcm.vgather16_32;
/* get the word offsets in a vector pair */
offsets = *(HVX_VectorPair *)word_offsets;
VGATHER_16_32(vgather, &vtcm.vscatter16_32, region_len, offsets);
/* deal the elements to get the order back */
values = *(HVX_Vector *)vgather;
values = VDEAL_H(values);
/* write it back to vtcm address */
*(HVX_Vector *)vgather = values;
} }
/* masked gather the elements from the scatter16_32 buffer using HVX */
void vector_gather_16_32_masked(void) void vector_gather_16_32_masked(void)
{ {
HVX_Vector *vgather; unsigned short init = gather_16_masked_init();
HVX_VectorPair offsets;
HVX_Vector pred_reg;
HVX_VectorPred preds;
HVX_Vector values;
/* get the vtcm address to gather from */ asm ("v0.h = vsplat(%5)\n\t"
vgather = (HVX_Vector *)&vtcm.vgather16_32; "vmem(%4 + #0) = v0\n\t" /* initialize the write area */
"r1 = #-1\n\t"
"v0 = vmem(%0 + #0)\n\t"
"v0.h = vshuff(v0.h)\n\t" /* shuffle the predicates */
"q0 = vand(v0, r1)\n\t"
"m0 = %2\n\t"
"v0 = vmem(%3 + #0)\n\t"
"v1 = vmem(%3 + #1)\n\t"
"{ if (q0) vtmp.h = vgather(%1, m0, v1:0.w).h\n\t"
" vmem(%4 + #0) = vtmp.new }\n\t"
"v0 = vmem(%4 + #0)\n\t"
"v0.h = vdeal(v0.h)\n\t" /* deal the elements to get the order back */
"vmem(%4 + #0) = v0\n\t"
: : "r"(half_predicates), "r"(vtcm.vscatter16_32), "r"(region_len),
"r"(word_offsets), "r"(vtcm.vgather16_32), "r"(init)
: "r1", "q0", "m0", "v0", "v1", "memory");
/* get the word offsets in a vector pair */ sync_gather(vtcm.vgather16_32);
offsets = *(HVX_VectorPair *)word_offsets;
pred_reg = *(HVX_Vector *)half_predicates;
pred_reg = VSHUFF_H(pred_reg);
preds = VAND_VAL(pred_reg, ~0);
*vgather = VSPLAT_H(gather_16_masked_init());
VGATHER_16_32_MASKED(vgather, preds, &vtcm.vscatter16_32, region_len,
offsets);
/* deal the elements to get the order back */
values = *(HVX_Vector *)vgather;
values = VDEAL_H(values);
/* write it back to vtcm address */
*(HVX_Vector *)vgather = values;
} }
static void check_buffer(const char *name, void *c, void *r, size_t size) static void check_buffer(const char *name, void *c, void *r, size_t size)
@ -579,6 +604,7 @@ void scalar_scatter_16_acc(unsigned short *vscatter16)
} }
} }
/* scatter-accumulate the 16 bit elements using C */
void check_scatter_16_acc() void check_scatter_16_acc()
{ {
memset(vscatter16_ref, FILL_CHAR, memset(vscatter16_ref, FILL_CHAR,
@ -589,7 +615,7 @@ void check_scatter_16_acc()
SCATTER_BUFFER_SIZE * sizeof(unsigned short)); SCATTER_BUFFER_SIZE * sizeof(unsigned short));
} }
/* scatter the 16 bit elements using C */ /* masked scatter the 16 bit elements using C */
void scalar_scatter_16_masked(unsigned short *vscatter16) void scalar_scatter_16_masked(unsigned short *vscatter16)
{ {
for (int i = 0; i < MATRIX_SIZE; i++) { for (int i = 0; i < MATRIX_SIZE; i++) {
@ -628,7 +654,7 @@ void check_scatter_32()
SCATTER_BUFFER_SIZE * sizeof(unsigned int)); SCATTER_BUFFER_SIZE * sizeof(unsigned int));
} }
/* scatter the 32 bit elements using C */ /* scatter-accumulate the 32 bit elements using C */
void scalar_scatter_32_acc(unsigned int *vscatter32) void scalar_scatter_32_acc(unsigned int *vscatter32)
{ {
for (int i = 0; i < MATRIX_SIZE; ++i) { for (int i = 0; i < MATRIX_SIZE; ++i) {
@ -646,7 +672,7 @@ void check_scatter_32_acc()
SCATTER_BUFFER_SIZE * sizeof(unsigned int)); SCATTER_BUFFER_SIZE * sizeof(unsigned int));
} }
/* scatter the 32 bit elements using C */ /* masked scatter the 32 bit elements using C */
void scalar_scatter_32_masked(unsigned int *vscatter32) void scalar_scatter_32_masked(unsigned int *vscatter32)
{ {
for (int i = 0; i < MATRIX_SIZE; i++) { for (int i = 0; i < MATRIX_SIZE; i++) {
@ -667,7 +693,7 @@ void check_scatter_32_masked()
SCATTER_BUFFER_SIZE * sizeof(unsigned int)); SCATTER_BUFFER_SIZE * sizeof(unsigned int));
} }
/* scatter the 32 bit elements using C */ /* scatter the 16 bit elements with 32 bit offsets using C */
void scalar_scatter_16_32(unsigned short *vscatter16_32) void scalar_scatter_16_32(unsigned short *vscatter16_32)
{ {
for (int i = 0; i < MATRIX_SIZE; ++i) { for (int i = 0; i < MATRIX_SIZE; ++i) {
@ -684,7 +710,7 @@ void check_scatter_16_32()
SCATTER_BUFFER_SIZE * sizeof(unsigned short)); SCATTER_BUFFER_SIZE * sizeof(unsigned short));
} }
/* scatter the 32 bit elements using C */ /* scatter-accumulate the 16 bit elements with 32 bit offsets using C */
void scalar_scatter_16_32_acc(unsigned short *vscatter16_32) void scalar_scatter_16_32_acc(unsigned short *vscatter16_32)
{ {
for (int i = 0; i < MATRIX_SIZE; ++i) { for (int i = 0; i < MATRIX_SIZE; ++i) {
@ -702,6 +728,7 @@ void check_scatter_16_32_acc()
SCATTER_BUFFER_SIZE * sizeof(unsigned short)); SCATTER_BUFFER_SIZE * sizeof(unsigned short));
} }
/* masked scatter the 16 bit elements with 32 bit offsets using C */
void scalar_scatter_16_32_masked(unsigned short *vscatter16_32) void scalar_scatter_16_32_masked(unsigned short *vscatter16_32)
{ {
for (int i = 0; i < MATRIX_SIZE; i++) { for (int i = 0; i < MATRIX_SIZE; i++) {
@ -738,6 +765,7 @@ void check_gather_16()
MATRIX_SIZE * sizeof(unsigned short)); MATRIX_SIZE * sizeof(unsigned short));
} }
/* masked gather the elements from the scatter buffer using C */
void scalar_gather_16_masked(unsigned short *vgather16) void scalar_gather_16_masked(unsigned short *vgather16)
{ {
for (int i = 0; i < MATRIX_SIZE; ++i) { for (int i = 0; i < MATRIX_SIZE; ++i) {
@ -756,7 +784,7 @@ void check_gather_16_masked()
MATRIX_SIZE * sizeof(unsigned short)); MATRIX_SIZE * sizeof(unsigned short));
} }
/* gather the elements from the scatter buffer using C */ /* gather the elements from the scatter32 buffer using C */
void scalar_gather_32(unsigned int *vgather32) void scalar_gather_32(unsigned int *vgather32)
{ {
for (int i = 0; i < MATRIX_SIZE; ++i) { for (int i = 0; i < MATRIX_SIZE; ++i) {
@ -772,6 +800,7 @@ void check_gather_32(void)
MATRIX_SIZE * sizeof(unsigned int)); MATRIX_SIZE * sizeof(unsigned int));
} }
/* masked gather the elements from the scatter32 buffer using C */
void scalar_gather_32_masked(unsigned int *vgather32) void scalar_gather_32_masked(unsigned int *vgather32)
{ {
for (int i = 0; i < MATRIX_SIZE; ++i) { for (int i = 0; i < MATRIX_SIZE; ++i) {
@ -781,7 +810,6 @@ void scalar_gather_32_masked(unsigned int *vgather32)
} }
} }
void check_gather_32_masked(void) void check_gather_32_masked(void)
{ {
memset(vgather32_ref, gather_32_masked_init(), memset(vgather32_ref, gather_32_masked_init(),
@ -791,7 +819,7 @@ void check_gather_32_masked(void)
vgather32_ref, MATRIX_SIZE * sizeof(unsigned int)); vgather32_ref, MATRIX_SIZE * sizeof(unsigned int));
} }
/* gather the elements from the scatter buffer using C */ /* gather the elements from the scatter16_32 buffer using C */
void scalar_gather_16_32(unsigned short *vgather16_32) void scalar_gather_16_32(unsigned short *vgather16_32)
{ {
for (int i = 0; i < MATRIX_SIZE; ++i) { for (int i = 0; i < MATRIX_SIZE; ++i) {
@ -807,6 +835,7 @@ void check_gather_16_32(void)
MATRIX_SIZE * sizeof(unsigned short)); MATRIX_SIZE * sizeof(unsigned short));
} }
/* masked gather the elements from the scatter16_32 buffer using C */
void scalar_gather_16_32_masked(unsigned short *vgather16_32) void scalar_gather_16_32_masked(unsigned short *vgather16_32)
{ {
for (int i = 0; i < MATRIX_SIZE; ++i) { for (int i = 0; i < MATRIX_SIZE; ++i) {