commit
c44493cd8a
|
@ -82,7 +82,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE
|
|||
$<$<BOOL:${TEST_AUTOMATION}>:TEST_AUTOMATION>
|
||||
$<$<BOOL:${ENABLE_LOG}>:DEBUGFAST>)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE core core/deps core/deps/stb core/deps/xbyak core/khronos)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE core core/deps core/deps/stb core/khronos)
|
||||
|
||||
find_package(OpenMP)
|
||||
if(OpenMP_CXX_FOUND AND NOT APPLE AND USE_OPENMP)
|
||||
|
@ -912,6 +912,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
|
|||
core/deps/vixl/utils-vixl.h)
|
||||
target_sources(${PROJECT_NAME} PRIVATE core/rec-ARM64/rec_arm64.cpp core/rec-ARM64/arm64_regalloc.h)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|x86_64.*|AMD64.*")
|
||||
add_subdirectory(core/deps/xbyak)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE xbyak::xbyak)
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
core/rec-x64/xbyak_base.h
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
name: test
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- run: sudo apt install nasm yasm g++-multilib tcsh
|
||||
- run: make test
|
|
@ -0,0 +1 @@
|
|||
/build* # cmake
|
|
@ -1,12 +0,0 @@
|
|||
sudo: true
|
||||
dist: bionic
|
||||
language: cpp
|
||||
compiler:
|
||||
- gcc
|
||||
- clang
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- nasm yasm g++-multilib tcsh
|
||||
script:
|
||||
- make test
|
|
@ -1,6 +1,46 @@
|
|||
cmake_minimum_required(VERSION 2.6)
|
||||
project(xbyak)
|
||||
cmake_minimum_required(VERSION 2.6...3.0.2)
|
||||
|
||||
project(xbyak CXX)
|
||||
|
||||
file(GLOB headers xbyak/*.h)
|
||||
install(FILES ${headers} DESTINATION include/xbyak)
|
||||
|
||||
if (DEFINED CMAKE_VERSION AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.2)
|
||||
include(GNUInstallDirs)
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME} INTERFACE
|
||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
)
|
||||
|
||||
install(
|
||||
TARGETS ${PROJECT_NAME}
|
||||
EXPORT ${PROJECT_NAME}-targets
|
||||
)
|
||||
|
||||
configure_file(
|
||||
cmake/config.cmake.in
|
||||
${PROJECT_NAME}Config.cmake
|
||||
@ONLY
|
||||
)
|
||||
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
|
||||
install(
|
||||
EXPORT ${PROJECT_NAME}-targets
|
||||
NAMESPACE ${PROJECT_NAME}::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
elseif(NOT DEFINED CMAKE_INSTALL_INCLUDEDIR)
|
||||
set(CMAKE_INSTALL_INCLUDEDIR "include")
|
||||
endif()
|
||||
|
||||
install(
|
||||
FILES ${headers}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/xbyak
|
||||
)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
PREFIX=/usr/local
|
||||
PREFIX?=/usr/local
|
||||
INSTALL_DIR=$(PREFIX)/include/xbyak
|
||||
|
||||
all:
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake")
|
|
@ -0,0 +1,26 @@
|
|||
TARGET=../xbyak/xbyak_mnemonic.h
|
||||
BIN=sortline gen_code gen_avx512
|
||||
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers
|
||||
all: $(TARGET)
|
||||
sortline: sortline.cpp
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
|
||||
$(TARGET): $(BIN)
|
||||
./gen_code | ./sortline > $@
|
||||
echo "#ifdef XBYAK_ENABLE_OMITTED_OPERAND" >> $@
|
||||
./gen_code omit | ./sortline >> $@
|
||||
echo "#endif" >>$@
|
||||
./gen_code fixed >> $@
|
||||
echo "#ifndef XBYAK_DISABLE_AVX512" >> $@
|
||||
./gen_avx512 | ./sortline >> $@
|
||||
echo "#ifdef XBYAK64" >> $@
|
||||
./gen_avx512 64 | ./sortline >> $@
|
||||
echo "#endif" >> $@
|
||||
echo "#endif" >> $@
|
||||
|
||||
clean:
|
||||
$(RM) $(BIN) $(TARGET)
|
|
@ -0,0 +1,170 @@
|
|||
#include <assert.h>
|
||||
// copy CodeGenerator::AVXtype
|
||||
enum AVXtype {
|
||||
// low 3 bit
|
||||
T_N1 = 1,
|
||||
T_N2 = 2,
|
||||
T_N4 = 3,
|
||||
T_N8 = 4,
|
||||
T_N16 = 5,
|
||||
T_N32 = 6,
|
||||
T_NX_MASK = 7,
|
||||
//
|
||||
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
|
||||
T_DUP = 1 << 4, // N = (8, 32, 64)
|
||||
T_66 = 1 << 5,
|
||||
T_F3 = 1 << 6,
|
||||
T_F2 = 1 << 7,
|
||||
T_0F = 1 << 8,
|
||||
T_0F38 = 1 << 9,
|
||||
T_0F3A = 1 << 10,
|
||||
T_L0 = 1 << 11,
|
||||
T_L1 = 1 << 12,
|
||||
T_W0 = 1 << 13,
|
||||
T_W1 = 1 << 14,
|
||||
T_EW0 = 1 << 15,
|
||||
T_EW1 = 1 << 16,
|
||||
T_YMM = 1 << 17, // support YMM, ZMM
|
||||
T_EVEX = 1 << 18,
|
||||
T_ER_X = 1 << 19, // xmm{er}
|
||||
T_ER_Y = 1 << 20, // ymm{er}
|
||||
T_ER_Z = 1 << 21, // zmm{er}
|
||||
T_SAE_X = 1 << 22, // xmm{sae}
|
||||
T_SAE_Y = 1 << 23, // ymm{sae}
|
||||
T_SAE_Z = 1 << 24, // zmm{sae}
|
||||
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
||||
T_B32 = 1 << 26, // m32bcst
|
||||
T_B64 = 1 << 27, // m64bcst
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
T_XXX
|
||||
};
|
||||
|
||||
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
|
||||
|
||||
std::string type2String(int type)
|
||||
{
|
||||
std::string str;
|
||||
int low = type & T_NX_MASK;
|
||||
if (0 < low) {
|
||||
const char *tbl[8] = {
|
||||
"T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32"
|
||||
};
|
||||
assert(low < int(sizeof(tbl) / sizeof(tbl[0])));
|
||||
str = tbl[low - 1];
|
||||
}
|
||||
if (type & T_N_VL) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_N_VL";
|
||||
}
|
||||
if (type & T_DUP) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_DUP";
|
||||
}
|
||||
if (type & T_66) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_66";
|
||||
}
|
||||
if (type & T_F3) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_F3";
|
||||
}
|
||||
if (type & T_F2) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_F2";
|
||||
}
|
||||
if (type & T_0F) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_0F";
|
||||
}
|
||||
if (type & T_0F38) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_0F38";
|
||||
}
|
||||
if (type & T_0F3A) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_0F3A";
|
||||
}
|
||||
if (type & T_L0) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "VEZ_L0";
|
||||
}
|
||||
if (type & T_L1) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "VEZ_L1";
|
||||
}
|
||||
if (type & T_W0) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_W0";
|
||||
}
|
||||
if (type & T_W1) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_W1";
|
||||
}
|
||||
if (type & T_EW0) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_EW0";
|
||||
}
|
||||
if (type & T_EW1) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_EW1";
|
||||
}
|
||||
if (type & T_YMM) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_YMM";
|
||||
}
|
||||
if (type & T_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_EVEX";
|
||||
}
|
||||
if (type & T_ER_X) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_X";
|
||||
}
|
||||
if (type & T_ER_Y) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_Y";
|
||||
}
|
||||
if (type & T_ER_Z) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_Z";
|
||||
}
|
||||
if (type & T_SAE_X) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_X";
|
||||
}
|
||||
if (type & T_SAE_Y) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_Y";
|
||||
}
|
||||
if (type & T_SAE_Z) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_Z";
|
||||
}
|
||||
if (type & T_MUST_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_MUST_EVEX";
|
||||
}
|
||||
if (type & T_B32) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_B32";
|
||||
}
|
||||
if (type & T_B64) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_B64";
|
||||
}
|
||||
if (type & T_M_K) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_M_K";
|
||||
}
|
||||
if (type & T_VSIB) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_VSIB";
|
||||
}
|
||||
if (type & T_MEM_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_MEM_EVEX";
|
||||
}
|
||||
return str;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("enum {");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
printf(" B");
|
||||
for (int j = 0; j < 8; j++) {
|
||||
putchar(i & (1 << (7 - j)) ? '1' : '0');
|
||||
}
|
||||
printf("= %d", i);
|
||||
if (i < 255) putchar(',');
|
||||
putchar('\n');
|
||||
}
|
||||
puts("};");
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,750 @@
|
|||
#define XBYAK_DONT_READ_LIST
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../xbyak/xbyak.h"
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
using namespace Xbyak;
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4996) // scanf
|
||||
#define snprintf _snprintf_s
|
||||
#endif
|
||||
|
||||
#include "avx_type.hpp"
|
||||
|
||||
void putOpmask(bool only64bit)
|
||||
{
|
||||
if (only64bit) {
|
||||
puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }");
|
||||
puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }");
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "kadd", 0x4A },
|
||||
{ "kand", 0x41 },
|
||||
{ "kandn", 0x42 },
|
||||
{ "kor", 0x45 },
|
||||
{ "kxnor", 0x46 },
|
||||
{ "kxor", 0x47 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
printf("void %sw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code);
|
||||
}
|
||||
printf("void kunpckbw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); }\n");
|
||||
printf("void kunpckwd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4B); }\n");
|
||||
printf("void kunpckdq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4B); }\n");
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "knot", 0x44 },
|
||||
{ "kortest", 0x98 },
|
||||
{ "ktest", 0x99 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
printf("void %sw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code);
|
||||
printf("void %sd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code);
|
||||
}
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "kshiftl", 0x32 },
|
||||
{ "kshiftr", 0x30 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
printf("void %sw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code);
|
||||
printf("void %sq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1);
|
||||
printf("void %sb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code);
|
||||
printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1);
|
||||
}
|
||||
}
|
||||
puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }");
|
||||
puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }");
|
||||
puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }");
|
||||
puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }");
|
||||
|
||||
puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }");
|
||||
puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }");
|
||||
puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }");
|
||||
puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }");
|
||||
|
||||
puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }");
|
||||
puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }");
|
||||
puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }");
|
||||
puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }");
|
||||
puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }");
|
||||
puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }");
|
||||
}
|
||||
|
||||
// vcmppd(k, x, op)
|
||||
void putVcmp()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
{ 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true },
|
||||
{ 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true },
|
||||
{ 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true },
|
||||
{ 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true },
|
||||
|
||||
{ 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
{ 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
{ 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32, false },
|
||||
{ 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
{ 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
{ 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
|
||||
{ 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
|
||||
|
||||
{ 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
|
||||
{ 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
|
||||
{ 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
|
||||
{ 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
|
||||
{ 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
|
||||
{ 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
|
||||
|
||||
{ 0x26, "vptestmb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
|
||||
{ 0x26, "vptestmw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0x27, "vptestmd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0x27, "vptestmq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x26, "vptestnmb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
|
||||
{ 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
}
|
||||
|
||||
// XM_X
|
||||
void putX_XM()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
|
||||
// putCvt
|
||||
{ 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
|
||||
{ 0x79, "vcvtps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_ER_Z },
|
||||
{ 0xE6, "vcvtqq2pd", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
|
||||
{ 0x7A, "vcvttpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
|
||||
{ 0x78, "vcvttpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
|
||||
{ 0x78, "vcvttps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
|
||||
{ 0x7A, "vcvtudq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z },
|
||||
{ 0x7A, "vcvtuqq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z },
|
||||
|
||||
{ 0x88, "vexpandpd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
{ 0x88, "vexpandps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
|
||||
{ 0x89, "vpexpandd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
{ 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
{ 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
|
||||
{ 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
}
|
||||
puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }");
|
||||
|
||||
puts("void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }");
|
||||
puts("void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }");
|
||||
puts("void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }");
|
||||
puts("void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }");
|
||||
|
||||
puts("void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }");
|
||||
puts("void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }");
|
||||
}
|
||||
|
||||
void putM_X()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
|
||||
void putXM_X()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
{ 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
|
||||
{ 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
{ 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
|
||||
{ 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 },
|
||||
{ 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
|
||||
void putX_X_XM_IMM()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
{ 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true },
|
||||
{ 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM, true },
|
||||
{ 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
|
||||
{ 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
|
||||
{ 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
|
||||
{ 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
|
||||
{ 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0xE2, "vpsraq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_N16, false },
|
||||
{ 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
{ 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
|
||||
{ 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
|
||||
{ 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
|
||||
{ 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
|
||||
{ 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x7D, "vpermt2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
|
||||
{ 0x7D, "vpermt2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
|
||||
{ 0x7E, "vpermt2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x75, "vpermi2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
|
||||
{ 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
|
||||
{ 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
|
||||
{ 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
|
||||
|
||||
{ 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false },
|
||||
{ 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false },
|
||||
{ 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||
{ 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||
|
||||
{ 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x55, "vfixupimmsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N8, true },
|
||||
{ 0x55, "vfixupimmss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N4, true },
|
||||
|
||||
{ 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false },
|
||||
{ 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false },
|
||||
|
||||
{ 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false },
|
||||
{ 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false },
|
||||
|
||||
{ 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, true },
|
||||
{ 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, true },
|
||||
|
||||
{ 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false },
|
||||
{ 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false },
|
||||
{ 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
|
||||
{ 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
|
||||
|
||||
{ 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true },
|
||||
{ 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x15, "vprolvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x15, "vprolvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x14, "vprorvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x14, "vprorvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0xCB, "vrcp28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false },
|
||||
{ 0xCB, "vrcp28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false },
|
||||
|
||||
{ 0xCD, "vrsqrt28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false },
|
||||
{ 0xCD, "vrsqrt28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false },
|
||||
|
||||
{ 0x50, "vrangepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x50, "vrangeps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x51, "vrangesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||
{ 0x51, "vrangess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||
|
||||
{ 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||
|
||||
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
||||
{ 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
||||
{ 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
||||
|
||||
{ 0x70, "vpshldvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
{ 0x71, "vpshldvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x71, "vpshldvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
|
||||
|
||||
{ 0x72, "vpshrdw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
||||
{ 0x73, "vpshrdd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
||||
{ 0x73, "vpshrdq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
||||
|
||||
{ 0x72, "vpshrdvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
{ 0x73, "vpshrdvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x73, "vpshrdvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
|
||||
|
||||
{ 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
}
|
||||
|
||||
void putShift()
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
int idx;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 },
|
||||
{ "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
|
||||
{ "vprolq", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 },
|
||||
{ "vprord", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
|
||||
{ "vprorq", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
|
||||
void putExtractInsert()
|
||||
{
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
int type;
|
||||
bool isZMM;
|
||||
} tbl[] = {
|
||||
{ "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
{ "vextractf64x2", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
|
||||
{ "vextractf32x8", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
|
||||
{ "vextractf64x4", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
|
||||
|
||||
{ "vextracti32x4", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
{ "vextracti64x2", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
|
||||
{ "vextracti32x8", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
|
||||
{ "vextracti64x4", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM";
|
||||
printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
int type;
|
||||
bool isZMM;
|
||||
} tbl[] = {
|
||||
{ "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
{ "vinsertf64x2", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
|
||||
{ "vinsertf32x8", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
|
||||
{ "vinsertf64x4", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
|
||||
|
||||
{ "vinserti32x4", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
{ "vinserti64x2", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
|
||||
{ "vinserti32x8", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
|
||||
{ "vinserti64x4", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
const char *x = p.isZMM ? "Zmm" : "Ymm";
|
||||
const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))";
|
||||
printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {"
|
||||
"if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) "
|
||||
"opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void putBroadcast(bool only64bit)
|
||||
{
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
int reg;
|
||||
} tbl[] = {
|
||||
{ 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 },
|
||||
{ 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 },
|
||||
{ 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 },
|
||||
{ 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64},
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) {
|
||||
printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (only64bit) return;
|
||||
puts("void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }");
|
||||
puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }");
|
||||
puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); }");
|
||||
puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); }");
|
||||
puts("void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }");
|
||||
|
||||
puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x59); }");
|
||||
puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x5A); }");
|
||||
puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }");
|
||||
puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }");
|
||||
puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }");
|
||||
}
|
||||
|
||||
void putCvt()
|
||||
{
|
||||
puts("void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }");
|
||||
puts("void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }");
|
||||
puts("void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }");
|
||||
puts("void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }");
|
||||
puts("void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }");
|
||||
puts("void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }");
|
||||
puts("void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }");
|
||||
puts("void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }");
|
||||
|
||||
puts("void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
|
||||
puts("void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
|
||||
puts("void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
|
||||
puts("void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
|
||||
puts("void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }");
|
||||
puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
||||
puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
||||
}
|
||||
|
||||
enum { // same as xbyak.h
|
||||
xx_yy_zz = 0,
|
||||
xx_yx_zy = 1,
|
||||
xx_xy_yz = 2,
|
||||
};
|
||||
void putGather()
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
uint8_t code;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz },
|
||||
{ "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy },
|
||||
{ "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz },
|
||||
{ "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz },
|
||||
{ "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz },
|
||||
{ "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy },
|
||||
{ "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz },
|
||||
{ "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type | T_VSIB);
|
||||
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||
}
|
||||
}
|
||||
void putScatter()
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
uint8_t code;
|
||||
int mode; // reverse of gather
|
||||
} tbl[] = {
|
||||
{ "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz },
|
||||
{ "vpscatterdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA0, xx_yx_zy },
|
||||
{ "vpscatterqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA1, xx_xy_yz },
|
||||
{ "vpscatterqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA1, xx_yy_zz },
|
||||
|
||||
{ "vscatterdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA2, xx_yy_zz },
|
||||
{ "vscatterdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA2, xx_yx_zy },
|
||||
{ "vscatterqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA3, xx_xy_yz },
|
||||
{ "vscatterqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA3, xx_yy_zz },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type | T_VSIB);
|
||||
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||
}
|
||||
}
|
||||
|
||||
void putShuff()
|
||||
{
|
||||
puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }");
|
||||
puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }");
|
||||
puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }");
|
||||
puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }");
|
||||
}
|
||||
|
||||
void putMov()
|
||||
{
|
||||
puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }");
|
||||
puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }");
|
||||
puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }");
|
||||
puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }");
|
||||
|
||||
puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }");
|
||||
puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }");
|
||||
puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }");
|
||||
puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }");
|
||||
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
||||
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
||||
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
||||
|
||||
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
|
||||
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
|
||||
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
|
||||
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
|
||||
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void putX_XM_IMM()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
|
||||
{ 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
|
||||
{ 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
|
||||
{ 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
|
||||
|
||||
{ 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x44, "vplzcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x44, "vplzcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
|
||||
{ 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false },
|
||||
{ 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
{ 0x55, "vpopcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x55, "vpopcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
|
||||
|
||||
{ 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false },
|
||||
{ 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
}
|
||||
|
||||
void putMisc()
|
||||
{
|
||||
puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }");
|
||||
puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }");
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int zm;
|
||||
int type;
|
||||
uint8_t code;
|
||||
bool isZmm;
|
||||
} tbl[] = {
|
||||
{ "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true },
|
||||
{ "vgatherpf0qps", 1, T_EW0 | T_N4, 0xC7, true },
|
||||
{ "vgatherpf0dpd", 1, T_EW1 | T_N8, 0xC6, false },
|
||||
{ "vgatherpf0qpd", 1, T_EW1 | T_N8, 0xC7, true },
|
||||
|
||||
{ "vgatherpf1dps", 2, T_EW0 | T_N4, 0xC6, true },
|
||||
{ "vgatherpf1qps", 2, T_EW0 | T_N4, 0xC7, true },
|
||||
{ "vgatherpf1dpd", 2, T_EW1 | T_N8, 0xC6, false },
|
||||
{ "vgatherpf1qpd", 2, T_EW1 | T_N8, 0xC7, true },
|
||||
|
||||
{ "vscatterpf0dps", 5, T_EW0 | T_N4, 0xC6, true },
|
||||
{ "vscatterpf0qps", 5, T_EW0 | T_N4, 0xC7, true },
|
||||
{ "vscatterpf0dpd", 5, T_EW1 | T_N8, 0xC6, false },
|
||||
{ "vscatterpf0qpd", 5, T_EW1 | T_N8, 0xC7, true },
|
||||
|
||||
{ "vscatterpf1dps", 6, T_EW0 | T_N4, 0xC6, true },
|
||||
{ "vscatterpf1qps", 6, T_EW0 | T_N4, 0xC7, true },
|
||||
{ "vscatterpf1dpd", 6, T_EW1 | T_N8, 0xC6, false },
|
||||
{ "vscatterpf1qpd", 6, T_EW1 | T_N8, 0xC7, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
|
||||
printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n"
|
||||
, p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
|
||||
}
|
||||
}
|
||||
|
||||
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
|
||||
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
|
||||
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
|
||||
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
|
||||
|
||||
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
||||
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }");
|
||||
|
||||
puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }");
|
||||
puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }");
|
||||
}
|
||||
|
||||
void putV4FMA()
|
||||
{
|
||||
puts("void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); }");
|
||||
puts("void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }");
|
||||
puts("void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }");
|
||||
puts("void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }");
|
||||
puts("void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }");
|
||||
puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }");
|
||||
}
|
||||
|
||||
int main(int argc, char *[])
|
||||
{
|
||||
bool only64bit = argc == 2;
|
||||
putOpmask(only64bit);
|
||||
putBroadcast(only64bit);
|
||||
if (only64bit) {
|
||||
return 0;
|
||||
}
|
||||
putVcmp();
|
||||
putX_XM();
|
||||
putM_X();
|
||||
putXM_X();
|
||||
putX_X_XM_IMM();
|
||||
putShift();
|
||||
putExtractInsert();
|
||||
putCvt();
|
||||
putGather();
|
||||
putShuff();
|
||||
putMov();
|
||||
putX_XM_IMM();
|
||||
putMisc();
|
||||
putScatter();
|
||||
putV4FMA();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,23 @@
|
|||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <set>
|
||||
|
||||
typedef std::set<std::string> StrSet;
|
||||
|
||||
int main()
|
||||
{
|
||||
StrSet ss;
|
||||
std::string line;
|
||||
while (std::getline(std::cin, line)) {
|
||||
if (!line.empty() && line[line.size() - 1] == '\n') {
|
||||
line.resize(line.size() - 1);
|
||||
}
|
||||
if (!line.empty()) {
|
||||
ss.insert(line);
|
||||
}
|
||||
}
|
||||
for (StrSet::const_iterator i = ss.begin(), ie = ss.end(); i != ie; ++i) {
|
||||
std::cout << *i << std::endl;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
@echo off
|
||||
set OPT=/EHsc -I../ /W4 -D_CRT_SECURE_NO_WARNINGS
|
||||
set TARGET=..\\xbyak\\xbyak_mnemonic.h
|
||||
set SORT=sortline
|
||||
cl gen_code.cpp %OPT%
|
||||
gen_code | %SORT% > %TARGET%
|
||||
echo #ifdef XBYAK_ENABLE_OMITTED_OPERAND>> %TARGET%
|
||||
gen_code omit | %SORT% >> %TARGET%
|
||||
echo #endif>>%TARGET%
|
||||
gen_code fixed >> %TARGET%
|
||||
cl gen_avx512.cpp %OPT%
|
||||
echo #ifndef XBYAK_DISABLE_AVX512>> %TARGET%
|
||||
gen_avx512 | %SORT% >> %TARGET%
|
||||
echo #ifdef XBYAK64>> %TARGET%
|
||||
gen_avx512 64 | %SORT% >> %TARGET%
|
||||
echo #endif>> %TARGET%
|
||||
echo #endif>> %TARGET%
|
|
@ -1,11 +1,14 @@
|
|||
[](https://travis-ci.org/herumi/xbyak)
|
||||
[](https://github.com/herumi/xbyak/actions/workflows/main.yml)
|
||||
|
||||
# Xbyak 5.891 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
# Xbyak 5.992 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
|
||||
## Abstract
|
||||
|
||||
Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
|
||||
|
||||
The pronunciation of Xbyak is `kəi-bja-k`.
|
||||
It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world.
|
||||
|
||||
## Feature
|
||||
* header file only
|
||||
* Intel/MASM like syntax
|
||||
|
@ -15,6 +18,13 @@ Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x6
|
|||
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
|
||||
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||
|
||||
### News
|
||||
- vnni instructions such as vpdpbusd supports vex encoding.
|
||||
- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit.
|
||||
- (Windows) `#include <winsock2.h>` has been removed from xbyak.h, so add it explicitly if you need it.
|
||||
- support exception-less mode see. [Exception-less mode](#exception-less-mode)
|
||||
- `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined.
|
||||
|
||||
### Supported OS
|
||||
|
||||
* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
|
||||
|
@ -148,9 +158,15 @@ vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]);
|
|||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
|
||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||
```
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
- `k0` is dealt as no mask.
|
||||
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
|
||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||
* `k4 | k3` is different from `k3 | k4`.
|
||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||
|
@ -331,9 +347,9 @@ public:
|
|||
|
||||
## User allocated memory
|
||||
|
||||
You can make jit code on prepaired memory.
|
||||
You can make jit code on prepared memory.
|
||||
|
||||
Call `setProtectModeRE` yourself to change memory mode if using the prepaired memory.
|
||||
Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.
|
||||
|
||||
```
|
||||
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
||||
|
@ -400,15 +416,22 @@ c.setProtectModeRE();
|
|||
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
|
||||
See [protect-re.cpp](sample/protect-re.cpp).
|
||||
|
||||
## Exception-less mode
|
||||
If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`.
|
||||
In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong.
|
||||
The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`.
|
||||
`CodeGenerator::reset()` calls `ClearError()`.
|
||||
|
||||
## Macro
|
||||
|
||||
* **XBYAK32** is defined on 32bit.
|
||||
* **XBYAK64** is defined on 64bit.
|
||||
* **XBYAK64_WIN** is defined on 64bit Windows(VC)
|
||||
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin
|
||||
* **XBYAK64_WIN** is defined on 64bit Windows(VC).
|
||||
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin.
|
||||
* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, ....
|
||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future)
|
||||
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro
|
||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future).
|
||||
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro.
|
||||
* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`.
|
||||
|
||||
## Sample
|
||||
|
||||
|
@ -423,6 +446,21 @@ modified new BSD License
|
|||
http://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
## History
|
||||
* 2021/May/09 ver 5.992 support endbr32 and endbr64
|
||||
* 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14
|
||||
* 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito)
|
||||
* 2020/Oct/17 ver 5.98 support the form of [scale * reg]
|
||||
* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc.
|
||||
* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later
|
||||
* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`.
|
||||
* 2020/Jul/28 ver 5.94 remove #include <winsock2.h> (only windows)
|
||||
* 2020/Jul/21 ver 5.93 support exception-less mode
|
||||
* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov)
|
||||
* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64
|
||||
* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso)
|
||||
* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined.
|
||||
* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask)
|
||||
* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register
|
||||
* 2020/Feb/26 ver 5.891 fix typo of type
|
||||
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
|
||||
* 2019/Dec/20 ver 5.88 fix compile error on Windows
|
||||
|
@ -575,3 +613,5 @@ http://opensource.org/licenses/BSD-3-Clause
|
|||
## Author
|
||||
MITSUNARI Shigeo(herumi@nifty.com)
|
||||
|
||||
## Sponsors welcome
|
||||
[GitHub Sponsor](https://github.com/sponsors/herumi)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.891
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.992
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -29,12 +29,14 @@ and, orなどを使いたい場合は-fno-operator-namesをgcc/clangに指定し
|
|||
◎準備
|
||||
xbyak.h
|
||||
xbyak_bin2hex.h
|
||||
xbyak_mnemonic.h
|
||||
これらを同一のパスに入れてインクルードパスに追加してください。
|
||||
|
||||
Linuxではmake installで/usr/local/include/xbyakにコピーされます。
|
||||
-----------------------------------------------------------------------------
|
||||
◎下位互換性の破れ
|
||||
* push byte, immまたはpush word, immが下位8bit, 16bitにキャストした値を使うように変更。
|
||||
* (Windows) `<winsock2.h>`をincludeしなくなったので必要なら明示的にincludeしてください。
|
||||
* XBYAK_USE_MMAP_ALLOCATORがデフォルトで有効になりました。従来の方式にする場合はXBYAK_DONT_USE_MMAP_ALLOCATORを定義してください。
|
||||
* Xbyak::Errorの型をenumからclassに変更
|
||||
** 従来のenumの値をとるにはintにキャストしてください。
|
||||
* (古い)Reg32eクラスを(新しい)Reg32eとRegExpに分ける。
|
||||
|
@ -44,6 +46,13 @@ Linuxではmake installで/usr/local/include/xbyakにコピーされます。
|
|||
-----------------------------------------------------------------------------
|
||||
◎新機能
|
||||
|
||||
例外なしモード追加
|
||||
XBYAK_NO_EXCEPTIONを定義してコンパイルするとgcc/clangで-fno-exceptionsオプションでコンパイルできます。
|
||||
エラーは例外の代わりに`Xbyak::GetError()`で通達されます。
|
||||
この値が0でなければ何か問題が発生しています。
|
||||
この値は自動的に変更されないので`Xbyak::ClearError()`でリセットしてください。
|
||||
`CodeGenerator::reset()`は`ClearError()`を呼びます。
|
||||
|
||||
MmapAllocator追加
|
||||
これはUnix系OSでのみの仕様です。XBYAK_USE_MMAP_ALLOCATORを使うと利用できます。
|
||||
デフォルトのAllocatorはメモリ確保時にposix_memalignを使います。
|
||||
|
@ -52,7 +61,6 @@ map countの最大値は/proc/sys/vm/max_map_countに書かれています。
|
|||
デフォルトでは3万個ほどのXbyak::CodeGeneratorインスタンスを生成するとエラーになります。
|
||||
test/mprotect_test.cppで確認できます。
|
||||
これを避けるためにはmmapを使うMmapAllocatorを使ってください。
|
||||
将来この挙動がデフォルトになるかもしれません。
|
||||
|
||||
|
||||
AutoGrowモード追加
|
||||
|
@ -155,6 +163,9 @@ vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5)
|
|||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
|
||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||
|
||||
注意
|
||||
* k1, ..., k7 は新しいopmaskレジスタです。
|
||||
|
@ -371,6 +382,21 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2021/05/09 ver 5.992 endbr32とendbr64のサポート
|
||||
2020/11/16 ver 5.991 g++-5のC++14でconstexpr機能の抑制
|
||||
2020/10/19 ver 5.99 VNNI命令サポート(Thanks to akharito)
|
||||
2020/10/17 ver 5.98 [scale * reg]のサポート
|
||||
2020/09/08 ver 5.97 uint32などをuint32_tに置換
|
||||
2020/08/28 ver 5.95 レジスタクラスのコンストラクタがconstexprに対応(C++14以降)
|
||||
2020/08/04 ver 5.941 `CodeGenerator::reset()`が`ClearError()`を呼ぶように変更
|
||||
2020/07/28 ver 5.94 #include <winsock2.h>の削除 (only windows)
|
||||
2020/07/21 ver 5.93 例外なしモード追加
|
||||
2020/06/30 ver 5.92 Intel AMX命令サポート (Thanks to nshustrov)
|
||||
2020/06/19 ver 5.913 32ビット環境でXBYAK64を定義したときのmov(r64, imm64)を修正
|
||||
2020/06/19 ver 5.912 macOSの古いXcodeでもMAP_JITを有効にする(Thanks to rsdubtso)
|
||||
2020/05/10 ver 5.911 Linux/macOSでXBYAK_USE_MMAP_ALLOCATORがデフォルト有効になる
|
||||
2020/04/20 ver 5.91 マスクレジスタk0を受け入れる(マスクをしない)
|
||||
2020/04/09 ver 5.90 kmov{b,w,d,q}がサポートされないレジスタを受けると例外を投げる
|
||||
2020/02/26 ver 5.891 zm0のtype修正
|
||||
2020/01/03 ver 5.89 vfpclasspdの処理エラー修正
|
||||
2019/12/20 ver 5.88 Windowsでのコンパイルエラー修正
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
TARGET = test quantize bf toyvm test_util memfunc static_buf jmp_table
|
||||
XBYAK_INC=../xbyak/xbyak.h
|
||||
|
||||
BOOST_EXIST=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1")
|
||||
UNAME_M=$(shell uname -m)
|
||||
|
||||
ONLY_64BIT=0
|
||||
ifeq ($(shell uname -s),Darwin)
|
||||
ONLY_64BIT=1
|
||||
OS=mac
|
||||
ifeq ($(UNAME_M),x86_64)
|
||||
BIT=64
|
||||
endif
|
||||
|
@ -27,19 +29,27 @@ else
|
|||
endif
|
||||
|
||||
ifeq ($(BIT),64)
|
||||
TARGET += test64 bf64 memfunc64 test_util64 static_buf64 jmp_table64
|
||||
TARGET += test64 bf64 memfunc64 test_util64 jmp_table64
|
||||
ifeq ($(BOOST_EXIST),1)
|
||||
TARGET += calc64 #calc2_64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(OS),mac)
|
||||
TARGET += static_buf64
|
||||
endif
|
||||
|
||||
|
||||
ifneq ($(ONLY_64BIT),1)
|
||||
TARGET += test quantize bf toyvm test_util memfunc static_buf jmp_table
|
||||
ifeq ($(BOOST_EXIST),1)
|
||||
TARGET += calc #calc2
|
||||
TARGET += calc #calc2
|
||||
endif
|
||||
endif
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith -pedantic
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith #-pedantic
|
||||
|
||||
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN)
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
void dump(const Xbyak::uint8 *code, size_t size)
|
||||
void dump(const uint8_t *code, size_t size)
|
||||
{
|
||||
puts("#include <stdio.h>\nstatic int stack[128 * 1024];");
|
||||
#ifdef _MSC_VER
|
||||
|
|
|
@ -102,7 +102,7 @@ private:
|
|||
MAX_CONST_NUM = 32
|
||||
};
|
||||
MIE_ALIGN(16) double constTbl_[MAX_CONST_NUM];
|
||||
Xbyak::uint64 negConst_;
|
||||
Xbyak::uint64_t negConst_;
|
||||
size_t constTblPos_;
|
||||
#ifdef XBYAK32
|
||||
const Xbyak::Reg32& varTbl_;
|
||||
|
@ -118,7 +118,7 @@ public:
|
|||
64bit: x [rcx](win), xmm0(gcc), return xmm0
|
||||
*/
|
||||
Jit()
|
||||
: negConst_(Xbyak::uint64(1) << 63)
|
||||
: negConst_(Xbyak::uint64_t(1) << 63)
|
||||
, constTblPos_(0)
|
||||
#ifdef XBYAK32
|
||||
, varTbl_(eax)
|
||||
|
|
|
@ -48,9 +48,6 @@
|
|||
#pragma warning(disable : 4996) // scanf
|
||||
#endif
|
||||
|
||||
typedef Xbyak::uint64 uint64;
|
||||
typedef Xbyak::uint32 uint32;
|
||||
|
||||
const int N = 64;
|
||||
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
|
@ -66,7 +63,7 @@ public:
|
|||
output : eax = [esi+offset] / dividend
|
||||
destroy : edx
|
||||
*/
|
||||
void udiv(uint32 dividend, int offset)
|
||||
void udiv(uint32_t dividend, int offset)
|
||||
{
|
||||
mov(eax, ptr[esi + offset]);
|
||||
|
||||
|
@ -83,11 +80,11 @@ public:
|
|||
return;
|
||||
}
|
||||
|
||||
uint64 mLow, mHigh;
|
||||
uint64_t mLow, mHigh;
|
||||
int len = ilog2(odd) + 1;
|
||||
{
|
||||
uint64 roundUp = uint64(1) << (32 + len);
|
||||
uint64 k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
|
||||
uint64_t roundUp = uint64_t(1) << (32 + len);
|
||||
uint64_t k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
|
||||
mLow = roundUp / odd;
|
||||
mHigh = (roundUp + k) / odd;
|
||||
}
|
||||
|
@ -96,12 +93,12 @@ public:
|
|||
mLow >>= 1; mHigh >>= 1; len--;
|
||||
}
|
||||
|
||||
uint64 m; int a;
|
||||
uint64_t m; int a;
|
||||
if ((mHigh >> 32) == 0) {
|
||||
m = mHigh; a = 0;
|
||||
} else {
|
||||
len = ilog2(odd);
|
||||
uint64 roundDown = uint64(1) << (32 + len);
|
||||
uint64_t roundDown = uint64_t(1) << (32 + len);
|
||||
mLow = roundDown / odd;
|
||||
int r = (int)(roundDown % odd);
|
||||
m = (r <= (odd >> 1)) ? mLow : mLow + 1;
|
||||
|
@ -124,9 +121,9 @@ public:
|
|||
mov(eax, edx);
|
||||
}
|
||||
/*
|
||||
quantize(uint32 dest[64], const uint32 src[64]);
|
||||
quantize(uint32_t dest[64], const uint32_t src[64]);
|
||||
*/
|
||||
Quantize(const uint32 qTbl[64])
|
||||
Quantize(const uint32_t qTbl[64])
|
||||
{
|
||||
push(esi);
|
||||
push(edi);
|
||||
|
@ -143,7 +140,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
void quantize(uint32 dest[64], const uint32 src[64], const uint32 qTbl[64])
|
||||
void quantize(uint32_t dest[64], const uint32_t src[64], const uint32_t qTbl[64])
|
||||
{
|
||||
for (int i = 0; i < N; i++) {
|
||||
dest[i] = src[i] / qTbl[i];
|
||||
|
@ -170,7 +167,7 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
}
|
||||
printf("q=%d\n", q);
|
||||
uint32 qTbl[] = {
|
||||
uint32_t qTbl[] = {
|
||||
16, 11, 10, 16, 24, 40, 51, 61,
|
||||
12, 12, 14, 19, 26, 58, 60, 55,
|
||||
14, 13, 16, 24, 40, 57, 69, 56,
|
||||
|
@ -187,16 +184,16 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
|
||||
try {
|
||||
uint32 src[N];
|
||||
uint32 dest[N];
|
||||
uint32 dest2[N];
|
||||
uint32_t src[N];
|
||||
uint32_t dest[N];
|
||||
uint32_t dest2[N];
|
||||
for (int i = 0; i < N; i++) {
|
||||
src[i] = rand() % 2048;
|
||||
}
|
||||
|
||||
Quantize jit(qTbl);
|
||||
//printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode());
|
||||
void (*quantize2)(uint32*, const uint32*, const uint32 *) = jit.getCode<void (*)(uint32*, const uint32*, const uint32 *)>();
|
||||
void (*quantize2)(uint32_t*, const uint32_t*, const uint32_t *) = jit.getCode<void (*)(uint32_t*, const uint32_t*, const uint32_t *)>();
|
||||
|
||||
quantize(dest, src, qTbl);
|
||||
quantize2(dest2, src, qTbl);
|
||||
|
|
|
@ -163,15 +163,15 @@ int main()
|
|||
// use memory allocated by user
|
||||
using namespace Xbyak;
|
||||
const size_t codeSize = 4096;
|
||||
uint8 buf[codeSize + 16];
|
||||
uint8 *p = CodeArray::getAlignedAddress(buf);
|
||||
uint8_t buf[codeSize + 16];
|
||||
uint8_t *p = CodeArray::getAlignedAddress(buf);
|
||||
Sample s(p, codeSize);
|
||||
if (!CodeArray::protect(p, codeSize, CodeArray::PROTECT_RWE)) {
|
||||
fprintf(stderr, "can't protect\n");
|
||||
return 1;
|
||||
}
|
||||
int (*func)(int) = s.getCode<int (*)(int)>();
|
||||
const uint8 *funcp = reinterpret_cast<const uint8*>(func);
|
||||
const uint8_t *funcp = reinterpret_cast<const uint8_t*>(func);
|
||||
if (funcp != p) {
|
||||
fprintf(stderr, "internal error %p %p\n", p, funcp);
|
||||
return 1;
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
#include <stdio.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
struct PopCountTest : public Xbyak::CodeGenerator {
|
||||
PopCountTest(int n)
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
ret();
|
||||
mov(eax, n);
|
||||
popcnt(eax, eax);
|
||||
ret();
|
||||
|
@ -80,6 +81,10 @@ void putCPUinfo()
|
|||
{ Cpu::tAVX512_VPOPCNTDQ, "avx512_vpopcntdq" },
|
||||
{ Cpu::tAVX512_BF16, "avx512_bf16" },
|
||||
{ Cpu::tAVX512_VP2INTERSECT, "avx512_vp2intersect" },
|
||||
{ Cpu::tAMX_TILE, "amx(tile)" },
|
||||
{ Cpu::tAMX_INT8, "amx(int8)" },
|
||||
{ Cpu::tAMX_BF16, "amx(bf16)" },
|
||||
{ Cpu::tAVX_VNNI, "avx_vnni" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||
|
@ -88,12 +93,16 @@ void putCPUinfo()
|
|||
if (cpu.has(Cpu::tPOPCNT)) {
|
||||
const int n = 0x12345678; // bitcount = 13
|
||||
const int ok = 13;
|
||||
int r = PopCountTest(n).getCode<int (*)()>()();
|
||||
PopCountTest code(n);
|
||||
code.setProtectModeRE();
|
||||
int (*f)() = code.getCode<int (*)()>();
|
||||
int r = f();
|
||||
if (r == ok) {
|
||||
puts("popcnt ok");
|
||||
} else {
|
||||
printf("popcnt ng %d %d\n", r, ok);
|
||||
}
|
||||
code.setProtectModeRW();
|
||||
}
|
||||
/*
|
||||
displayFamily displayModel
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
using namespace Xbyak;
|
||||
|
||||
class ToyVm : public Xbyak::CodeGenerator {
|
||||
typedef std::vector<uint32> Buffer;
|
||||
typedef std::vector<uint32_t> Buffer;
|
||||
public:
|
||||
enum Reg {
|
||||
A, B
|
||||
|
@ -53,14 +53,14 @@ public:
|
|||
{
|
||||
::memset(mem_, 0, sizeof(mem_));
|
||||
}
|
||||
void vldi(Reg r, uint16 imm) { encode(LDI, r, imm); }
|
||||
void vld(Reg r, uint16 idx) { encode(LD, r, idx); }
|
||||
void vst(Reg r, uint16 idx) { encode(ST, r, idx); }
|
||||
void vadd(Reg r, uint16 idx) { encode(ADD, r, idx); }
|
||||
void vaddi(Reg r, uint16 imm) { encode(ADDI, r, imm); }
|
||||
void vsub(Reg r, uint16 idx) { encode(SUB, r, idx); }
|
||||
void vsubi(Reg r, uint16 imm) { encode(SUBI, r, imm); }
|
||||
void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast<uint16>(offset)); }
|
||||
void vldi(Reg r, uint16_t imm) { encode(LDI, r, imm); }
|
||||
void vld(Reg r, uint16_t idx) { encode(LD, r, idx); }
|
||||
void vst(Reg r, uint16_t idx) { encode(ST, r, idx); }
|
||||
void vadd(Reg r, uint16_t idx) { encode(ADD, r, idx); }
|
||||
void vaddi(Reg r, uint16_t imm) { encode(ADDI, r, imm); }
|
||||
void vsub(Reg r, uint16_t idx) { encode(SUB, r, idx); }
|
||||
void vsubi(Reg r, uint16_t imm) { encode(SUBI, r, imm); }
|
||||
void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast<uint16_t>(offset)); }
|
||||
void vput(Reg r) { encode(PUT, r); }
|
||||
void setMark()
|
||||
{
|
||||
|
@ -73,12 +73,12 @@ public:
|
|||
void run()
|
||||
{
|
||||
bool debug = false;//true;
|
||||
uint32 reg[2] = { 0, 0 };
|
||||
uint32_t reg[2] = { 0, 0 };
|
||||
const size_t end = code_.size();
|
||||
uint32 pc = 0;
|
||||
uint32_t pc = 0;
|
||||
for (;;) {
|
||||
uint32 x = code_[pc];
|
||||
uint32 code, r, imm;
|
||||
uint32_t x = code_[pc];
|
||||
uint32_t code, r, imm;
|
||||
decode(code, r, imm, x);
|
||||
if (debug) {
|
||||
printf("---\n");
|
||||
|
@ -149,11 +149,11 @@ public:
|
|||
xor_(edi, edi);
|
||||
mov(mem, (size_t)mem_);
|
||||
const size_t end = code_.size();
|
||||
uint32 pc = 0;
|
||||
uint32 labelNum = 0;
|
||||
uint32_t pc = 0;
|
||||
uint32_t labelNum = 0;
|
||||
for (;;) {
|
||||
uint32 x = code_[pc];
|
||||
uint32 code, r, imm;
|
||||
uint32_t x = code_[pc];
|
||||
uint32_t code, r, imm;
|
||||
decode(code, r, imm, x);
|
||||
L(Label::toStr(labelNum++));
|
||||
switch (code) {
|
||||
|
@ -229,18 +229,18 @@ public:
|
|||
ret();
|
||||
}
|
||||
private:
|
||||
uint32 mem_[65536];
|
||||
uint32_t mem_[65536];
|
||||
Buffer code_;
|
||||
int mark_;
|
||||
void decode(uint32& code, uint32& r, uint32& imm, uint32 x)
|
||||
void decode(uint32_t& code, uint32_t& r, uint32_t& imm, uint32_t x)
|
||||
{
|
||||
code = x >> 24;
|
||||
r = (x >> 16) & 0xff;
|
||||
imm = x & 0xffff;
|
||||
}
|
||||
void encode(Code code, Reg r, uint16 imm = 0)
|
||||
void encode(Code code, Reg r, uint16_t imm = 0)
|
||||
{
|
||||
uint32 x = (code << 24) | (r << 16) | imm;
|
||||
uint32_t x = (code << 24) | (r << 16) | imm;
|
||||
code_.push_back(x);
|
||||
}
|
||||
};
|
||||
|
@ -262,7 +262,7 @@ public:
|
|||
*/
|
||||
vldi(A, 1); // c
|
||||
vst(A, 0); // p(1)
|
||||
vldi(B, static_cast<uint16>(n));
|
||||
vldi(B, static_cast<uint16_t>(n));
|
||||
vst(B, 2); // n
|
||||
// lp
|
||||
setMark();
|
||||
|
@ -283,9 +283,9 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
void fibC(uint32 n)
|
||||
void fibC(uint32_t n)
|
||||
{
|
||||
uint32 p, c, t;
|
||||
uint32_t p, c, t;
|
||||
p = 1;
|
||||
c = 1;
|
||||
lp:
|
||||
|
|
|
@ -1,9 +1,18 @@
|
|||
TARGET = make_nm normalize_prefix jmp address bad_address misc cvt_test cvt_test32
|
||||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception
|
||||
XBYAK_INC=../xbyak/xbyak.h
|
||||
UNAME_S=$(shell uname -s)
|
||||
BIT=32
|
||||
ifeq ($(shell uname -m),x86_64)
|
||||
BIT=64
|
||||
endif
|
||||
ONLY_64BIT=0
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
# 32-bit binary is not supported
|
||||
ONLY_64BIT=1
|
||||
endif
|
||||
ifeq ($(ONLY_64BIT),0)
|
||||
TARGET += jmp address
|
||||
endif
|
||||
|
||||
ifeq ($(BIT),64)
|
||||
TARGET += jmp64 address64
|
||||
|
@ -36,18 +45,24 @@ cvt_test: cvt_test.cpp ../xbyak/xbyak.h
|
|||
$(CXX) $(CFLAGS) $< -o $@
|
||||
cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
|
||||
noexception: noexception.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) $< -o $@ -fno-exceptions
|
||||
|
||||
test_nm: normalize_prefix jmp bad_address $(TARGET)
|
||||
test_nm: normalize_prefix $(TARGET)
|
||||
$(MAKE) -C ../gen
|
||||
ifneq ($(ONLY_64BIT),1)
|
||||
./test_nm.sh
|
||||
./test_nm.sh noexcept
|
||||
./noexception
|
||||
./test_nm.sh Y
|
||||
./test_nm.sh avx512
|
||||
./test_address.sh
|
||||
./jmp
|
||||
./cvt_test32
|
||||
endif
|
||||
./bad_address
|
||||
./misc
|
||||
./cvt_test
|
||||
./cvt_test32
|
||||
ifeq ($(BIT),64)
|
||||
./test_address.sh 64
|
||||
./test_nm.sh 64
|
||||
|
@ -56,8 +71,10 @@ ifeq ($(BIT),64)
|
|||
endif
|
||||
|
||||
test_avx: normalize_prefix
|
||||
ifneq ($(ONLY_64BIT),0)
|
||||
./test_avx.sh
|
||||
./test_avx.sh Y
|
||||
endif
|
||||
ifeq ($(BIT),64)
|
||||
./test_address.sh 64
|
||||
./test_avx.sh 64
|
||||
|
@ -65,7 +82,9 @@ ifeq ($(BIT),64)
|
|||
endif
|
||||
|
||||
test_avx512: normalize_prefix
|
||||
ifneq ($(ONLY_64BIT),0)
|
||||
./test_avx512.sh
|
||||
endif
|
||||
ifeq ($(BIT),64)
|
||||
./test_avx512.sh 64
|
||||
endif
|
||||
|
|
|
@ -117,7 +117,7 @@ CYBOZU_TEST_AUTO(test1)
|
|||
int offset;
|
||||
bool isBack;
|
||||
bool isShort;
|
||||
uint8 result[6];
|
||||
uint8_t result[6];
|
||||
int size;
|
||||
} tbl[] = {
|
||||
{ 0, true, true, { 0xeb, 0xfe }, 2 },
|
||||
|
@ -133,7 +133,7 @@ CYBOZU_TEST_AUTO(test1)
|
|||
const Tbl *p = &tbl[i];
|
||||
for (int k = 0; k < 2; k++) {
|
||||
TestJmp jmp(p->offset, p->isBack, p->isShort, k == 0);
|
||||
const uint8 *q = (const uint8*)jmp.getCode();
|
||||
const uint8_t *q = (const uint8_t*)jmp.getCode();
|
||||
if (p->isBack) q += p->offset; /* skip nop */
|
||||
for (int j = 0; j < p->size; j++) {
|
||||
CYBOZU_TEST_EQUAL(q[j], p->result[j]);
|
||||
|
@ -207,7 +207,7 @@ CYBOZU_TEST_AUTO(testJmpCx)
|
|||
|
||||
CYBOZU_TEST_AUTO(loop)
|
||||
{
|
||||
const uint8 ok[] = {
|
||||
const uint8_t ok[] = {
|
||||
// lp:
|
||||
0x31, 0xC0, // xor eax, eax
|
||||
0xE2, 0xFC, // loop lp
|
||||
|
@ -372,11 +372,11 @@ CYBOZU_TEST_AUTO(test3)
|
|||
}
|
||||
#endif
|
||||
|
||||
Xbyak::uint8 bufL[4096 * 32];
|
||||
Xbyak::uint8 bufS[4096 * 2];
|
||||
uint8_t bufL[4096 * 32];
|
||||
uint8_t bufS[4096 * 2];
|
||||
|
||||
struct MyAllocator : Xbyak::Allocator {
|
||||
Xbyak::uint8 *alloc(size_t size)
|
||||
uint8_t *alloc(size_t size)
|
||||
{
|
||||
if (size < sizeof(bufS)) {
|
||||
printf("test use bufS(%d)\n", (int)size);
|
||||
|
@ -389,7 +389,7 @@ struct MyAllocator : Xbyak::Allocator {
|
|||
fprintf(stderr, "no memory %d\n", (int)size);
|
||||
exit(1);
|
||||
}
|
||||
void free(Xbyak::uint8 *)
|
||||
void free(uint8_t *)
|
||||
{
|
||||
}
|
||||
} myAlloc;
|
||||
|
@ -428,6 +428,7 @@ CYBOZU_TEST_AUTO(test4)
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef __APPLE__
|
||||
CYBOZU_TEST_AUTO(test5)
|
||||
{
|
||||
struct Test5 : Xbyak::CodeGenerator {
|
||||
|
@ -475,8 +476,9 @@ CYBOZU_TEST_AUTO(test5)
|
|||
gm.assign((const char*)gc.getCode(), gc.getSize());
|
||||
CYBOZU_TEST_EQUAL(fm, gm);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t getValue(const uint8* p)
|
||||
size_t getValue(const uint8_t* p)
|
||||
{
|
||||
size_t v = 0;
|
||||
for (size_t i = 0; i < sizeof(size_t); i++) {
|
||||
|
@ -485,7 +487,7 @@ size_t getValue(const uint8* p)
|
|||
return v;
|
||||
}
|
||||
|
||||
void checkAddr(const uint8 *p, size_t offset, size_t expect)
|
||||
void checkAddr(const uint8_t *p, size_t offset, size_t expect)
|
||||
{
|
||||
size_t v = getValue(p + offset);
|
||||
CYBOZU_TEST_EQUAL(v, size_t(p) + expect);
|
||||
|
@ -533,7 +535,7 @@ CYBOZU_TEST_AUTO(MovLabel)
|
|||
|
||||
const struct {
|
||||
int pos;
|
||||
uint8 ok;
|
||||
uint8_t ok;
|
||||
} tbl[] = {
|
||||
#ifdef XBYAK32
|
||||
{ 0x00, 0x90 },
|
||||
|
@ -567,11 +569,11 @@ CYBOZU_TEST_AUTO(MovLabel)
|
|||
const bool useNewLabel = k == 0;
|
||||
MovLabelCode code(grow, useNewLabel);
|
||||
if (grow) code.ready();
|
||||
const uint8* const p = code.getCode();
|
||||
const uint8_t* const p = code.getCode();
|
||||
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
|
||||
int pos = tbl[i].pos;
|
||||
uint8 x = p[pos];
|
||||
uint8 ok = tbl[i].ok;
|
||||
uint8_t x = p[pos];
|
||||
uint8_t ok = tbl[i].ok;
|
||||
CYBOZU_TEST_EQUAL(x, ok);
|
||||
}
|
||||
#ifdef XBYAK32
|
||||
|
@ -1217,11 +1219,11 @@ CYBOZU_TEST_AUTO(rip_jmp)
|
|||
CYBOZU_TEST_EQUAL(ret, ret1234() + ret9999());
|
||||
}
|
||||
|
||||
#ifdef XBYAK64_GCC
|
||||
#if 0
|
||||
CYBOZU_TEST_AUTO(rip_addr)
|
||||
{
|
||||
/*
|
||||
assume |&x - &code| < 2GiB
|
||||
we can't assume |&x - &code| < 2GiB anymore
|
||||
*/
|
||||
static int x = 5;
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
|
@ -1236,6 +1238,8 @@ CYBOZU_TEST_AUTO(rip_addr)
|
|||
CYBOZU_TEST_EQUAL(x, 123);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __APPLE__
|
||||
CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
|
||||
{
|
||||
MIE_ALIGN(4096) static char buf[8192];
|
||||
|
@ -1260,6 +1264,7 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
|
|||
code.setProtectModeRW();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
struct ReleaseTestCode : Xbyak::CodeGenerator {
|
||||
ReleaseTestCode(Label& L1, Label& L2, Label& L3)
|
||||
|
|
|
@ -9,111 +9,111 @@ using namespace Xbyak;
|
|||
|
||||
const int bitEnd = 64;
|
||||
|
||||
const uint64 YMM_SAE = 1ULL << 0;
|
||||
const uint64 _XMM = 1ULL << 1;
|
||||
const uint64 _MEM = 1ULL << 2;
|
||||
const uint64 _REG32 = 1ULL << 3;
|
||||
const uint64 EAX = 1ULL << 4;
|
||||
const uint64 IMM32 = 1ULL << 5;
|
||||
const uint64 IMM8 = 1ULL << 6;
|
||||
const uint64 _REG8 = 1ULL << 7;
|
||||
const uint64 _REG16 = 1ULL << 8;
|
||||
const uint64 XMM_K = 1ULL << 9;
|
||||
const uint64 YMM_K = 1ULL << 10;
|
||||
const uint64 ZMM_K = 1ULL << 11;
|
||||
const uint64 AX = 1ULL << 12;
|
||||
const uint64 AL = 1ULL << 13;
|
||||
const uint64 IMM_1 = 1ULL << 14;
|
||||
const uint64 MEM8 = 1ULL << 15;
|
||||
const uint64 MEM16 = 1ULL << 16;
|
||||
const uint64 MEM32 = 1ULL << 17;
|
||||
const uint64 VM32Z = 1ULL << 19;
|
||||
const uint64 K_K = 1ULL << 20;
|
||||
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
||||
const uint64 VM32X_K = 1ULL << 23;
|
||||
const uint64 _YMM = 1ULL << 24;
|
||||
const uint64 VM32X_32 = 1ULL << 39;
|
||||
const uint64 VM32X_64 = 1ULL << 40;
|
||||
const uint64 VM32Y_32 = 1ULL << 41;
|
||||
const uint64 VM32Y_64 = 1ULL << 42;
|
||||
const uint64 VM32Z_K = 1ULL << 32;
|
||||
const uint64_t YMM_SAE = 1ULL << 0;
|
||||
const uint64_t _XMM = 1ULL << 1;
|
||||
const uint64_t _MEM = 1ULL << 2;
|
||||
const uint64_t _REG32 = 1ULL << 3;
|
||||
const uint64_t EAX = 1ULL << 4;
|
||||
const uint64_t IMM32 = 1ULL << 5;
|
||||
const uint64_t IMM8 = 1ULL << 6;
|
||||
const uint64_t _REG8 = 1ULL << 7;
|
||||
const uint64_t _REG16 = 1ULL << 8;
|
||||
const uint64_t XMM_K = 1ULL << 9;
|
||||
const uint64_t YMM_K = 1ULL << 10;
|
||||
const uint64_t ZMM_K = 1ULL << 11;
|
||||
const uint64_t AX = 1ULL << 12;
|
||||
const uint64_t AL = 1ULL << 13;
|
||||
const uint64_t IMM_1 = 1ULL << 14;
|
||||
const uint64_t MEM8 = 1ULL << 15;
|
||||
const uint64_t MEM16 = 1ULL << 16;
|
||||
const uint64_t MEM32 = 1ULL << 17;
|
||||
const uint64_t VM32Z = 1ULL << 19;
|
||||
const uint64_t K_K = 1ULL << 20;
|
||||
const uint64_t MEM_ONLY_DISP = 1ULL << 21;
|
||||
const uint64_t VM32X_K = 1ULL << 23;
|
||||
const uint64_t _YMM = 1ULL << 24;
|
||||
const uint64_t VM32X_32 = 1ULL << 39;
|
||||
const uint64_t VM32X_64 = 1ULL << 40;
|
||||
const uint64_t VM32Y_32 = 1ULL << 41;
|
||||
const uint64_t VM32Y_64 = 1ULL << 42;
|
||||
const uint64_t VM32Z_K = 1ULL << 32;
|
||||
#ifdef XBYAK64
|
||||
const uint64 _MEMe = 1ULL << 25;
|
||||
const uint64 REG32_2 = 1ULL << 26; // r8d, ...
|
||||
const uint64 REG16_2 = 1ULL << 27; // r8w, ...
|
||||
const uint64 REG8_2 = 1ULL << 28; // r8b, ...
|
||||
const uint64 REG8_3 = 1ULL << 29; // spl, ...
|
||||
const uint64 _REG64 = 1ULL << 30; // rax, ...
|
||||
const uint64 _REG64_2 = 1ULL << 31; // r8, ...
|
||||
const uint64 _XMM2 = 1ULL << 33;
|
||||
const uint64 _YMM2 = 1ULL << 34;
|
||||
const uint64 VM32X = VM32X_32 | VM32X_64;
|
||||
const uint64 VM32Y = VM32Y_32 | VM32Y_64;
|
||||
const uint64_t _MEMe = 1ULL << 25;
|
||||
const uint64_t REG32_2 = 1ULL << 26; // r8d, ...
|
||||
const uint64_t REG16_2 = 1ULL << 27; // r8w, ...
|
||||
const uint64_t REG8_2 = 1ULL << 28; // r8b, ...
|
||||
const uint64_t REG8_3 = 1ULL << 29; // spl, ...
|
||||
const uint64_t _REG64 = 1ULL << 30; // rax, ...
|
||||
const uint64_t _REG64_2 = 1ULL << 31; // r8, ...
|
||||
const uint64_t _XMM2 = 1ULL << 33;
|
||||
const uint64_t _YMM2 = 1ULL << 34;
|
||||
const uint64_t VM32X = VM32X_32 | VM32X_64;
|
||||
const uint64_t VM32Y = VM32Y_32 | VM32Y_64;
|
||||
#else
|
||||
const uint64 _MEMe = 0;
|
||||
const uint64 REG32_2 = 0;
|
||||
const uint64 REG16_2 = 0;
|
||||
const uint64 REG8_2 = 0;
|
||||
const uint64 REG8_3 = 0;
|
||||
const uint64 _REG64 = 0;
|
||||
const uint64 _REG64_2 = 0;
|
||||
const uint64 _XMM2 = 0;
|
||||
const uint64 _YMM2 = 0;
|
||||
const uint64 VM32X = VM32X_32;
|
||||
const uint64 VM32Y = VM32Y_32;
|
||||
const uint64_t _MEMe = 0;
|
||||
const uint64_t REG32_2 = 0;
|
||||
const uint64_t REG16_2 = 0;
|
||||
const uint64_t REG8_2 = 0;
|
||||
const uint64_t REG8_3 = 0;
|
||||
const uint64_t _REG64 = 0;
|
||||
const uint64_t _REG64_2 = 0;
|
||||
const uint64_t _XMM2 = 0;
|
||||
const uint64_t _YMM2 = 0;
|
||||
const uint64_t VM32X = VM32X_32;
|
||||
const uint64_t VM32Y = VM32Y_32;
|
||||
#endif
|
||||
const uint64 REG64 = _REG64 | _REG64_2;
|
||||
const uint64 REG32 = _REG32 | REG32_2 | EAX;
|
||||
const uint64 REG16 = _REG16 | REG16_2 | AX;
|
||||
const uint64 REG32e = REG32 | REG64;
|
||||
const uint64 REG8 = _REG8 | REG8_2|AL;
|
||||
const uint64 MEM = _MEM | _MEMe;
|
||||
const uint64 MEM64 = 1ULL << 35;
|
||||
const uint64 YMM_ER = 1ULL << 36;
|
||||
const uint64 VM32Y_K = 1ULL << 37;
|
||||
const uint64 IMM_2 = 1ULL << 38;
|
||||
const uint64 IMM = IMM_1 | IMM_2;
|
||||
const uint64 YMM = _YMM | _YMM2;
|
||||
const uint64 K = 1ULL << 43;
|
||||
const uint64 _ZMM = 1ULL << 44;
|
||||
const uint64 _ZMM2 = 1ULL << 45;
|
||||
const uint64_t REG64 = _REG64 | _REG64_2;
|
||||
const uint64_t REG32 = _REG32 | REG32_2 | EAX;
|
||||
const uint64_t REG16 = _REG16 | REG16_2 | AX;
|
||||
const uint64_t REG32e = REG32 | REG64;
|
||||
const uint64_t REG8 = _REG8 | REG8_2|AL;
|
||||
const uint64_t MEM = _MEM | _MEMe;
|
||||
const uint64_t MEM64 = 1ULL << 35;
|
||||
const uint64_t YMM_ER = 1ULL << 36;
|
||||
const uint64_t VM32Y_K = 1ULL << 37;
|
||||
const uint64_t IMM_2 = 1ULL << 38;
|
||||
const uint64_t IMM = IMM_1 | IMM_2;
|
||||
const uint64_t YMM = _YMM | _YMM2;
|
||||
const uint64_t K = 1ULL << 43;
|
||||
const uint64_t _ZMM = 1ULL << 44;
|
||||
const uint64_t _ZMM2 = 1ULL << 45;
|
||||
#ifdef XBYAK64
|
||||
const uint64 ZMM = _ZMM | _ZMM2;
|
||||
const uint64 _YMM3 = 1ULL << 46;
|
||||
const uint64_t ZMM = _ZMM | _ZMM2;
|
||||
const uint64_t _YMM3 = 1ULL << 46;
|
||||
#else
|
||||
const uint64 ZMM = _ZMM;
|
||||
const uint64 _YMM3 = 0;
|
||||
const uint64_t ZMM = _ZMM;
|
||||
const uint64_t _YMM3 = 0;
|
||||
#endif
|
||||
const uint64 K2 = 1ULL << 47;
|
||||
const uint64 ZMM_SAE = 1ULL << 48;
|
||||
const uint64 ZMM_ER = 1ULL << 49;
|
||||
const uint64_t K2 = 1ULL << 47;
|
||||
const uint64_t ZMM_SAE = 1ULL << 48;
|
||||
const uint64_t ZMM_ER = 1ULL << 49;
|
||||
#ifdef XBYAK64
|
||||
const uint64 _XMM3 = 1ULL << 50;
|
||||
const uint64_t _XMM3 = 1ULL << 50;
|
||||
#else
|
||||
const uint64 _XMM3 = 0;
|
||||
const uint64_t _XMM3 = 0;
|
||||
#endif
|
||||
const uint64 XMM = _XMM | _XMM2 | _XMM3;
|
||||
const uint64 XMM_SAE = 1ULL << 51;
|
||||
const uint64_t XMM = _XMM | _XMM2 | _XMM3;
|
||||
const uint64_t XMM_SAE = 1ULL << 51;
|
||||
#ifdef XBYAK64
|
||||
const uint64 XMM_KZ = 1ULL << 52;
|
||||
const uint64 YMM_KZ = 1ULL << 53;
|
||||
const uint64 ZMM_KZ = 1ULL << 54;
|
||||
const uint64_t XMM_KZ = 1ULL << 52;
|
||||
const uint64_t YMM_KZ = 1ULL << 53;
|
||||
const uint64_t ZMM_KZ = 1ULL << 54;
|
||||
#else
|
||||
const uint64 XMM_KZ = 0;
|
||||
const uint64 YMM_KZ = 0;
|
||||
const uint64 ZMM_KZ = 0;
|
||||
const uint64_t XMM_KZ = 0;
|
||||
const uint64_t YMM_KZ = 0;
|
||||
const uint64_t ZMM_KZ = 0;
|
||||
#endif
|
||||
const uint64 MEM_K = 1ULL << 55;
|
||||
const uint64 M_1to2 = 1ULL << 56;
|
||||
const uint64 M_1to4 = 1ULL << 57;
|
||||
const uint64 M_1to8 = 1ULL << 58;
|
||||
const uint64 M_1to16 = 1ULL << 59;
|
||||
const uint64 XMM_ER = 1ULL << 60;
|
||||
const uint64 M_xword = 1ULL << 61;
|
||||
const uint64 M_yword = 1ULL << 62;
|
||||
const uint64 MY_1to4 = 1ULL << 18;
|
||||
const uint64_t MEM_K = 1ULL << 55;
|
||||
const uint64_t M_1to2 = 1ULL << 56;
|
||||
const uint64_t M_1to4 = 1ULL << 57;
|
||||
const uint64_t M_1to8 = 1ULL << 58;
|
||||
const uint64_t M_1to16 = 1ULL << 59;
|
||||
const uint64_t XMM_ER = 1ULL << 60;
|
||||
const uint64_t M_xword = 1ULL << 61;
|
||||
const uint64_t M_yword = 1ULL << 62;
|
||||
const uint64_t MY_1to4 = 1ULL << 18;
|
||||
|
||||
const uint64 NOPARA = 1ULL << (bitEnd - 1);
|
||||
const uint64_t NOPARA = 1ULL << (bitEnd - 1);
|
||||
|
||||
class Test {
|
||||
Test(const Test&);
|
||||
|
@ -121,7 +121,7 @@ class Test {
|
|||
const bool isXbyak_;
|
||||
int funcNum_;
|
||||
// check all op1, op2, op3
|
||||
void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const
|
||||
void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const
|
||||
{
|
||||
for (int i = 0; i < bitEnd; i++) {
|
||||
if ((op1 & (1ULL << i)) == 0) continue;
|
||||
|
@ -144,7 +144,7 @@ class Test {
|
|||
}
|
||||
}
|
||||
}
|
||||
void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const
|
||||
void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const
|
||||
{
|
||||
for (int i = 0; i < bitEnd; i++) {
|
||||
if ((op & (1ULL << i)) == 0) continue;
|
||||
|
@ -156,7 +156,7 @@ class Test {
|
|||
printf("\n");
|
||||
}
|
||||
}
|
||||
void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const
|
||||
void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const
|
||||
{
|
||||
if (nasm == 0) nasm = xbyak;
|
||||
for (int i = 0; i < bitEnd; i++) {
|
||||
|
@ -169,7 +169,7 @@ class Test {
|
|||
printf("\n");
|
||||
}
|
||||
}
|
||||
const char *get(uint64 type) const
|
||||
const char *get(uint64_t type) const
|
||||
{
|
||||
int idx = (rand() / 31) & 7;
|
||||
switch (type) {
|
||||
|
@ -537,7 +537,7 @@ public:
|
|||
printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
|
||||
} else {
|
||||
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
|
||||
if (z) pz = "{z}";
|
||||
if (z && kIdx) pz = "{z}";
|
||||
printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,111 +10,111 @@ using namespace Xbyak;
|
|||
|
||||
const int bitEnd = 64;
|
||||
|
||||
const uint64 MMX = 1ULL << 0;
|
||||
const uint64 _XMM = 1ULL << 1;
|
||||
const uint64 _MEM = 1ULL << 2;
|
||||
const uint64 _REG32 = 1ULL << 3;
|
||||
const uint64 EAX = 1ULL << 4;
|
||||
const uint64 IMM32 = 1ULL << 5;
|
||||
const uint64 IMM8 = 1ULL << 6;
|
||||
const uint64 _REG8 = 1ULL << 7;
|
||||
const uint64 _REG16 = 1ULL << 8;
|
||||
const uint64 NEG8 = 1ULL << 9;
|
||||
const uint64 IMM16 = 1ULL << 10;
|
||||
const uint64 NEG16 = 1ULL << 11;
|
||||
const uint64 AX = 1ULL << 12;
|
||||
const uint64 AL = 1ULL << 13;
|
||||
const uint64 IMM_1 = 1ULL << 14;
|
||||
const uint64 MEM8 = 1ULL << 15;
|
||||
const uint64 MEM16 = 1ULL << 16;
|
||||
const uint64 MEM32 = 1ULL << 17;
|
||||
const uint64 ONE = 1ULL << 19;
|
||||
const uint64 CL = 1ULL << 20;
|
||||
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
||||
const uint64 NEG32 = 1ULL << 23;
|
||||
const uint64 _YMM = 1ULL << 24;
|
||||
const uint64 VM32X_32 = 1ULL << 39;
|
||||
const uint64 VM32X_64 = 1ULL << 40;
|
||||
const uint64 VM32Y_32 = 1ULL << 41;
|
||||
const uint64 VM32Y_64 = 1ULL << 42;
|
||||
const uint64_t MMX = 1ULL << 0;
|
||||
const uint64_t _XMM = 1ULL << 1;
|
||||
const uint64_t _MEM = 1ULL << 2;
|
||||
const uint64_t _REG32 = 1ULL << 3;
|
||||
const uint64_t EAX = 1ULL << 4;
|
||||
const uint64_t IMM32 = 1ULL << 5;
|
||||
const uint64_t IMM8 = 1ULL << 6;
|
||||
const uint64_t _REG8 = 1ULL << 7;
|
||||
const uint64_t _REG16 = 1ULL << 8;
|
||||
const uint64_t NEG8 = 1ULL << 9;
|
||||
const uint64_t IMM16 = 1ULL << 10;
|
||||
const uint64_t NEG16 = 1ULL << 11;
|
||||
const uint64_t AX = 1ULL << 12;
|
||||
const uint64_t AL = 1ULL << 13;
|
||||
const uint64_t IMM_1 = 1ULL << 14;
|
||||
const uint64_t MEM8 = 1ULL << 15;
|
||||
const uint64_t MEM16 = 1ULL << 16;
|
||||
const uint64_t MEM32 = 1ULL << 17;
|
||||
const uint64_t ONE = 1ULL << 19;
|
||||
const uint64_t CL = 1ULL << 20;
|
||||
const uint64_t MEM_ONLY_DISP = 1ULL << 21;
|
||||
const uint64_t NEG32 = 1ULL << 23;
|
||||
const uint64_t _YMM = 1ULL << 24;
|
||||
const uint64_t VM32X_32 = 1ULL << 39;
|
||||
const uint64_t VM32X_64 = 1ULL << 40;
|
||||
const uint64_t VM32Y_32 = 1ULL << 41;
|
||||
const uint64_t VM32Y_64 = 1ULL << 42;
|
||||
#ifdef XBYAK64
|
||||
const uint64 _MEMe = 1ULL << 25;
|
||||
const uint64 REG32_2 = 1ULL << 26; // r8d, ...
|
||||
const uint64 REG16_2 = 1ULL << 27; // r8w, ...
|
||||
const uint64 REG8_2 = 1ULL << 28; // r8b, ...
|
||||
const uint64 REG8_3 = 1ULL << 29; // spl, ...
|
||||
const uint64 _REG64 = 1ULL << 30; // rax, ...
|
||||
const uint64 _REG64_2 = 1ULL << 31; // r8, ...
|
||||
const uint64 RAX = 1ULL << 32;
|
||||
const uint64 _XMM2 = 1ULL << 33;
|
||||
const uint64 _YMM2 = 1ULL << 34;
|
||||
const uint64 VM32X = VM32X_32 | VM32X_64;
|
||||
const uint64 VM32Y = VM32Y_32 | VM32Y_64;
|
||||
const uint64_t _MEMe = 1ULL << 25;
|
||||
const uint64_t REG32_2 = 1ULL << 26; // r8d, ...
|
||||
const uint64_t REG16_2 = 1ULL << 27; // r8w, ...
|
||||
const uint64_t REG8_2 = 1ULL << 28; // r8b, ...
|
||||
const uint64_t REG8_3 = 1ULL << 29; // spl, ...
|
||||
const uint64_t _REG64 = 1ULL << 30; // rax, ...
|
||||
const uint64_t _REG64_2 = 1ULL << 31; // r8, ...
|
||||
const uint64_t RAX = 1ULL << 32;
|
||||
const uint64_t _XMM2 = 1ULL << 33;
|
||||
const uint64_t _YMM2 = 1ULL << 34;
|
||||
const uint64_t VM32X = VM32X_32 | VM32X_64;
|
||||
const uint64_t VM32Y = VM32Y_32 | VM32Y_64;
|
||||
#else
|
||||
const uint64 _MEMe = 0;
|
||||
const uint64 REG32_2 = 0;
|
||||
const uint64 REG16_2 = 0;
|
||||
const uint64 REG8_2 = 0;
|
||||
const uint64 REG8_3 = 0;
|
||||
const uint64 _REG64 = 0;
|
||||
const uint64 _REG64_2 = 0;
|
||||
const uint64 RAX = 0;
|
||||
const uint64 _XMM2 = 0;
|
||||
const uint64 _YMM2 = 0;
|
||||
const uint64 VM32X = VM32X_32;
|
||||
const uint64 VM32Y = VM32Y_32;
|
||||
const uint64_t _MEMe = 0;
|
||||
const uint64_t REG32_2 = 0;
|
||||
const uint64_t REG16_2 = 0;
|
||||
const uint64_t REG8_2 = 0;
|
||||
const uint64_t REG8_3 = 0;
|
||||
const uint64_t _REG64 = 0;
|
||||
const uint64_t _REG64_2 = 0;
|
||||
const uint64_t RAX = 0;
|
||||
const uint64_t _XMM2 = 0;
|
||||
const uint64_t _YMM2 = 0;
|
||||
const uint64_t VM32X = VM32X_32;
|
||||
const uint64_t VM32Y = VM32Y_32;
|
||||
#endif
|
||||
const uint64 REG64 = _REG64 | _REG64_2 | RAX;
|
||||
const uint64 REG32 = _REG32 | REG32_2 | EAX;
|
||||
const uint64 REG16 = _REG16 | REG16_2 | AX;
|
||||
const uint64 REG32e = REG32 | REG64;
|
||||
const uint64 REG8 = _REG8 | REG8_2|AL;
|
||||
const uint64 MEM = _MEM | _MEMe;
|
||||
const uint64 MEM64 = 1ULL << 35;
|
||||
const uint64 ST0 = 1ULL << 36;
|
||||
const uint64 STi = 1ULL << 37;
|
||||
const uint64 IMM_2 = 1ULL << 38;
|
||||
const uint64 IMM = IMM_1 | IMM_2;
|
||||
const uint64 XMM = _XMM | _XMM2;
|
||||
const uint64 YMM = _YMM | _YMM2;
|
||||
const uint64 K = 1ULL << 43;
|
||||
const uint64 _ZMM = 1ULL << 44;
|
||||
const uint64 _ZMM2 = 1ULL << 45;
|
||||
const uint64_t REG64 = _REG64 | _REG64_2 | RAX;
|
||||
const uint64_t REG32 = _REG32 | REG32_2 | EAX;
|
||||
const uint64_t REG16 = _REG16 | REG16_2 | AX;
|
||||
const uint64_t REG32e = REG32 | REG64;
|
||||
const uint64_t REG8 = _REG8 | REG8_2|AL;
|
||||
const uint64_t MEM = _MEM | _MEMe;
|
||||
const uint64_t MEM64 = 1ULL << 35;
|
||||
const uint64_t ST0 = 1ULL << 36;
|
||||
const uint64_t STi = 1ULL << 37;
|
||||
const uint64_t IMM_2 = 1ULL << 38;
|
||||
const uint64_t IMM = IMM_1 | IMM_2;
|
||||
const uint64_t XMM = _XMM | _XMM2;
|
||||
const uint64_t YMM = _YMM | _YMM2;
|
||||
const uint64_t K = 1ULL << 43;
|
||||
const uint64_t _ZMM = 1ULL << 44;
|
||||
const uint64_t _ZMM2 = 1ULL << 45;
|
||||
#ifdef XBYAK64
|
||||
const uint64 ZMM = _ZMM | _ZMM2;
|
||||
const uint64 _YMM3 = 1ULL << 46;
|
||||
const uint64_t ZMM = _ZMM | _ZMM2;
|
||||
const uint64_t _YMM3 = 1ULL << 46;
|
||||
#else
|
||||
const uint64 ZMM = _ZMM;
|
||||
const uint64 _YMM3 = 0;
|
||||
const uint64_t ZMM = _ZMM;
|
||||
const uint64_t _YMM3 = 0;
|
||||
#endif
|
||||
const uint64 K2 = 1ULL << 47;
|
||||
const uint64 ZMM_SAE = 1ULL << 48;
|
||||
const uint64 ZMM_ER = 1ULL << 49;
|
||||
const uint64_t K2 = 1ULL << 47;
|
||||
const uint64_t ZMM_SAE = 1ULL << 48;
|
||||
const uint64_t ZMM_ER = 1ULL << 49;
|
||||
#ifdef XBYAK64
|
||||
const uint64 _XMM3 = 1ULL << 50;
|
||||
const uint64_t _XMM3 = 1ULL << 50;
|
||||
#endif
|
||||
const uint64 XMM_SAE = 1ULL << 51;
|
||||
const uint64_t XMM_SAE = 1ULL << 51;
|
||||
#ifdef XBYAK64
|
||||
const uint64 XMM_KZ = 1ULL << 52;
|
||||
const uint64 YMM_KZ = 1ULL << 53;
|
||||
const uint64 ZMM_KZ = 1ULL << 54;
|
||||
const uint64_t XMM_KZ = 1ULL << 52;
|
||||
const uint64_t YMM_KZ = 1ULL << 53;
|
||||
const uint64_t ZMM_KZ = 1ULL << 54;
|
||||
#else
|
||||
const uint64 XMM_KZ = 0;
|
||||
const uint64 YMM_KZ = 0;
|
||||
const uint64 ZMM_KZ = 0;
|
||||
const uint64_t XMM_KZ = 0;
|
||||
const uint64_t YMM_KZ = 0;
|
||||
const uint64_t ZMM_KZ = 0;
|
||||
#endif
|
||||
const uint64 MEM_K = 1ULL << 55;
|
||||
const uint64 M_1to2 = 1ULL << 56;
|
||||
const uint64 M_1to4 = 1ULL << 57;
|
||||
const uint64 M_1to8 = 1ULL << 58;
|
||||
const uint64 M_1to16 = 1ULL << 59;
|
||||
const uint64 XMM_ER = 1ULL << 60;
|
||||
const uint64 M_xword = 1ULL << 61;
|
||||
const uint64 M_yword = 1ULL << 62;
|
||||
const uint64 MY_1to4 = 1ULL << 18;
|
||||
const uint64 BNDREG = 1ULL << 22;
|
||||
const uint64_t MEM_K = 1ULL << 55;
|
||||
const uint64_t M_1to2 = 1ULL << 56;
|
||||
const uint64_t M_1to4 = 1ULL << 57;
|
||||
const uint64_t M_1to8 = 1ULL << 58;
|
||||
const uint64_t M_1to16 = 1ULL << 59;
|
||||
const uint64_t XMM_ER = 1ULL << 60;
|
||||
const uint64_t M_xword = 1ULL << 61;
|
||||
const uint64_t M_yword = 1ULL << 62;
|
||||
const uint64_t MY_1to4 = 1ULL << 18;
|
||||
const uint64_t BNDREG = 1ULL << 22;
|
||||
|
||||
const uint64 NOPARA = 1ULL << (bitEnd - 1);
|
||||
const uint64_t NOPARA = 1ULL << (bitEnd - 1);
|
||||
|
||||
class Test {
|
||||
Test(const Test&);
|
||||
|
@ -131,7 +131,7 @@ class Test {
|
|||
}
|
||||
|
||||
// check all op1, op2, op3
|
||||
void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const
|
||||
void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const
|
||||
{
|
||||
for (int i = 0; i < bitEnd; i++) {
|
||||
if ((op1 & (1ULL << i)) == 0) continue;
|
||||
|
@ -154,7 +154,7 @@ class Test {
|
|||
}
|
||||
}
|
||||
}
|
||||
void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const
|
||||
void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const
|
||||
{
|
||||
for (int i = 0; i < bitEnd; i++) {
|
||||
if ((op & (1ULL << i)) == 0) continue;
|
||||
|
@ -166,7 +166,7 @@ class Test {
|
|||
printf("\n");
|
||||
}
|
||||
}
|
||||
void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const
|
||||
void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const
|
||||
{
|
||||
if (nasm == 0) nasm = xbyak;
|
||||
for (int i = 0; i < bitEnd; i++) {
|
||||
|
@ -179,7 +179,7 @@ class Test {
|
|||
printf("\n");
|
||||
}
|
||||
}
|
||||
const char *get(uint64 type) const
|
||||
const char *get(uint64_t type) const
|
||||
{
|
||||
int idx = (rand() / 31) & 7;
|
||||
if (type == ST0) {
|
||||
|
@ -643,6 +643,7 @@ class Test {
|
|||
puts(isXbyak_ ? "out_(dx, al); dump();" : "out dx, al");
|
||||
puts(isXbyak_ ? "out_(dx, ax); dump();" : "out dx, ax");
|
||||
puts(isXbyak_ ? "out_(dx, eax); dump();" : "out dx, eax");
|
||||
puts(isXbyak_ ? "lea(eax, ptr[edi + 4 * eax]); dump();" : "lea eax, [edi + 4 * eax]");
|
||||
}
|
||||
void putJmp() const
|
||||
{
|
||||
|
@ -858,7 +859,7 @@ class Test {
|
|||
SD = 1 << 3
|
||||
};
|
||||
const struct {
|
||||
uint8 code;
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
} sufTbl[] = {
|
||||
{ 0, "ps" },
|
||||
|
@ -867,7 +868,7 @@ class Test {
|
|||
{ 0xF2, "sd" },
|
||||
};
|
||||
static const struct XmmTbl1 {
|
||||
uint8 code;
|
||||
uint8_t code;
|
||||
int mode;
|
||||
const char *name;
|
||||
bool hasImm;
|
||||
|
@ -946,8 +947,8 @@ class Test {
|
|||
{
|
||||
static const struct Tbl {
|
||||
const char *name;
|
||||
uint64 op1;
|
||||
uint64 op2;
|
||||
uint64_t op1;
|
||||
uint64_t op2;
|
||||
} tbl[] = {
|
||||
{ "cvtpi2ps", XMM, MMX|MEM },
|
||||
{ "cvtps2pi", MMX, XMM|MEM },
|
||||
|
@ -1145,6 +1146,33 @@ class Test {
|
|||
put("pop", REG32|MEM32);
|
||||
#endif
|
||||
}
|
||||
void putPushPop8_16() const
|
||||
{
|
||||
const struct {
|
||||
int b;
|
||||
uint32_t v;
|
||||
} tbl[] = {
|
||||
{ 8, 0x7f },
|
||||
{ 8, 0x80 },
|
||||
{ 8, 0xff },
|
||||
{ 8, 0x100 },
|
||||
{ 8, 0x12345 },
|
||||
{ 16, 0x7fff },
|
||||
{ 16, 0x8000 },
|
||||
{ 16, 0xffff },
|
||||
{ 16, 0x10000 },
|
||||
{ 16, 0x12345 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const char *b = tbl[i].b == 8 ? "byte" : "word";
|
||||
uint32_t v = tbl[i].v;
|
||||
if (isXbyak_) {
|
||||
printf("push(%s, 0x%x);dump();\n", b, v);
|
||||
} else {
|
||||
printf("push %s 0x%x\n", b, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
void putTest() const
|
||||
{
|
||||
const char *p = "test";
|
||||
|
@ -1466,9 +1494,9 @@ class Test {
|
|||
void putMPX() const
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
const uint64 reg = REG64;
|
||||
const uint64_t reg = REG64;
|
||||
#else
|
||||
const uint64 reg = REG32;
|
||||
const uint64_t reg = REG32;
|
||||
#endif
|
||||
put("bndcl", BNDREG, reg|MEM);
|
||||
put("bndcu", BNDREG, reg|MEM);
|
||||
|
@ -2496,6 +2524,7 @@ public:
|
|||
separateFunc();
|
||||
putSSE4_2();
|
||||
putSeg(); // same behavior as yasm for mov rax, cx
|
||||
putPushPop8_16();
|
||||
#else
|
||||
putSIMPLE();
|
||||
putReg1();
|
||||
|
@ -2628,7 +2657,7 @@ public:
|
|||
printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
|
||||
} else {
|
||||
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
|
||||
if (z) pz = "{z}";
|
||||
if (z && kIdx) pz = "{z}";
|
||||
printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,13 +97,43 @@ CYBOZU_TEST_AUTO(align)
|
|||
CYBOZU_TEST_EQUAL(size_t(getCurr()) % alignSize, 0u);
|
||||
}
|
||||
align(alignSize);
|
||||
const uint8 *p = getCurr();
|
||||
const uint8_t *p = getCurr();
|
||||
// do nothing if aligned
|
||||
align(alignSize);
|
||||
CYBOZU_TEST_EQUAL(p, getCurr());
|
||||
}
|
||||
} c;
|
||||
}
|
||||
CYBOZU_TEST_AUTO(kmask)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
CYBOZU_TEST_EXCEPTION(kmovb(k1, ax), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(kmovw(k1, ax), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(kmovd(k1, ax), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(kmovq(k1, eax), std::exception);
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_EXCEPTION(kmovb(k1, rax), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(kmovw(k1, rax), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(kmovd(k1, rax), std::exception);
|
||||
CYBOZU_TEST_NO_EXCEPTION(kmovq(k1, rax));
|
||||
#endif
|
||||
CYBOZU_TEST_NO_EXCEPTION(vmovaps(xm0|k0, ptr[eax]));
|
||||
checkT_z();
|
||||
}
|
||||
void checkT_z()
|
||||
{
|
||||
const uint8_t *p1 = getCurr();
|
||||
vmovaps(zm0, ptr[eax]);
|
||||
const uint8_t *p2 = getCurr();
|
||||
vmovaps(zm0|T_z, ptr[eax]);
|
||||
const uint8_t *end = getCurr();
|
||||
CYBOZU_TEST_EQUAL(p2 - p1, end - p2);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(p1, p2, end - p2);
|
||||
}
|
||||
} c;
|
||||
}
|
||||
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_AUTO(vfmaddps)
|
||||
|
@ -721,4 +751,96 @@ CYBOZU_TEST_AUTO(bf16)
|
|||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(AMX)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
ldtilecfg(ptr[rax + rcx * 4 + 64]);
|
||||
sttilecfg(ptr[rsp + rax * 8 + 128]);
|
||||
tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
|
||||
tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
|
||||
tilerelease();
|
||||
tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
|
||||
tilezero(tmm7);
|
||||
tdpbssd(tmm1, tmm2, tmm3);
|
||||
tdpbsud(tmm2, tmm3, tmm4);
|
||||
tdpbusd(tmm3, tmm4, tmm5);
|
||||
tdpbuud(tmm4, tmm5, tmm6);
|
||||
tdpbf16ps(tmm5, tmm6, tmm7);
|
||||
}
|
||||
} c;
|
||||
// generated code by patch
|
||||
const uint8_t tbl[] = {
|
||||
0xc4, 0xe2, 0x78, 0x49, 0x44, 0x88, 0x40, 0xc4, 0xe2, 0x79, 0x49, 0x84, 0xc4, 0x80, 0x00, 0x00,
|
||||
0x00, 0xc4, 0xe2, 0x7b, 0x4b, 0x5c, 0x57, 0x08, 0xc4, 0x82, 0x79, 0x4b, 0x64, 0x08, 0x20, 0xc4,
|
||||
0xe2, 0x78, 0x49, 0xc0, 0xc4, 0x82, 0x7a, 0x4b, 0x54, 0x5a, 0x20, 0xc4, 0xe2, 0x7b, 0x49, 0xf8,
|
||||
0xc4, 0xe2, 0x63, 0x5e, 0xca, 0xc4, 0xe2, 0x5a, 0x5e, 0xd3, 0xc4, 0xe2, 0x51, 0x5e, 0xdc, 0xc4,
|
||||
0xe2, 0x48, 0x5e, 0xe5, 0xc4, 0xe2, 0x42, 0x5c, 0xee,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(tileloadd)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
tileloadd(tmm1, ptr[r8+r8]);
|
||||
tileloadd(tmm1, ptr[rax+rcx*4]);
|
||||
tileloadd(tmm1, ptr[r8+r9*1+0x40]);
|
||||
}
|
||||
void notSupported()
|
||||
{
|
||||
tileloadd(tmm1, ptr[r8]);
|
||||
}
|
||||
void notSupported2()
|
||||
{
|
||||
tileloadd(tmm1, ptr[r8*2]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0xC4, 0x82, 0x7B, 0x4B, 0x0C, 0x00,
|
||||
0xC4, 0xE2, 0x7B, 0x4B, 0x0C, 0x88,
|
||||
0xC4, 0x82, 0x7B, 0x4B, 0x4C, 0x08, 0x40,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
|
||||
// current version does not support this sibmem format
|
||||
CYBOZU_TEST_EXCEPTION(c.notSupported(), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(c.notSupported2(), std::exception);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(vnni)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
// default encoding is EVEX
|
||||
vpdpbusd(xm0, xm1, xm2);
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX
|
||||
}
|
||||
void badVex()
|
||||
{
|
||||
vpdpbusd(xm0, xm1, xm31, VexEncoding);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||
0xC4, 0xE2, 0x71, 0x50, 0xC2,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
|
||||
CYBOZU_TEST_EXCEPTION(c.badVex(), std::exception);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -22,16 +22,19 @@ class ErrorSample : public CodeGenerator {
|
|||
public:
|
||||
void gen()
|
||||
{
|
||||
#ifndef XBYAK_NO_EXCEPTION
|
||||
CYBOZU_TEST_EXCEPTION(mov(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(test(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(adc(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(setz(eax), std::exception);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 4u);
|
||||
// the size of Operand exceeds 32 bit.
|
||||
CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 8u);
|
||||
Sample s;
|
||||
s.gen();
|
||||
ErrorSample es;
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
#define XBYAK_NO_EXCEPTION
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
int g_err = 0;
|
||||
int g_test = 0;
|
||||
|
||||
void assertEq(int x, int y)
|
||||
{
|
||||
if (x != y) {
|
||||
printf("ERR x=%d y=%d\n", x, y);
|
||||
g_err++;
|
||||
}
|
||||
g_test++;
|
||||
}
|
||||
|
||||
void assertBool(bool b)
|
||||
{
|
||||
if (!b) {
|
||||
printf("ERR assertBool\n");
|
||||
g_err++;
|
||||
}
|
||||
g_test++;
|
||||
}
|
||||
|
||||
void test1()
|
||||
{
|
||||
const int v = 123;
|
||||
struct Code : CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
mov(eax, v);
|
||||
ret();
|
||||
}
|
||||
} c;
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
assertEq(f(), v);
|
||||
assertEq(Xbyak::GetError(), ERR_NONE);
|
||||
}
|
||||
|
||||
void test2()
|
||||
{
|
||||
struct Code : CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
Label lp;
|
||||
L(lp);
|
||||
L(lp);
|
||||
}
|
||||
} c;
|
||||
assertEq(Xbyak::GetError(), ERR_LABEL_IS_REDEFINED);
|
||||
Xbyak::ClearError();
|
||||
}
|
||||
|
||||
void test3()
|
||||
{
|
||||
static struct EmptyAllocator : Xbyak::Allocator {
|
||||
uint8_t *alloc() { return 0; }
|
||||
} emptyAllocator;
|
||||
struct Code : CodeGenerator {
|
||||
Code() : CodeGenerator(8, 0, &emptyAllocator)
|
||||
{
|
||||
mov(eax, 3);
|
||||
assertBool(Xbyak::GetError() == 0);
|
||||
mov(eax, 3);
|
||||
mov(eax, 3);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
assertBool(Xbyak::GetError() == 0);
|
||||
}
|
||||
} c;
|
||||
}
|
||||
|
||||
void test4()
|
||||
{
|
||||
struct Code : CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
mov(ptr[eax], 1);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
|
||||
test(ptr[eax], 1);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
|
||||
adc(ptr[eax], 1);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
|
||||
setz(eax);
|
||||
assertBool(Xbyak::GetError() != 0);
|
||||
Xbyak::ClearError();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test1();
|
||||
test2();
|
||||
test3();
|
||||
test4();
|
||||
if (g_err) {
|
||||
printf("err %d/%d\n", g_err, g_test);
|
||||
} else {
|
||||
printf("all ok %d\n", g_test);
|
||||
}
|
||||
return g_err != 0;
|
||||
}
|
|
@ -6,7 +6,7 @@
|
|||
#include <iostream>
|
||||
#include <memory.h>
|
||||
|
||||
typedef unsigned char uint8;
|
||||
typedef unsigned char uint8_t;
|
||||
|
||||
std::string normalize(const std::string& line)
|
||||
{
|
||||
|
|
|
@ -218,7 +218,7 @@ void check(int x, int y)
|
|||
}
|
||||
}
|
||||
|
||||
void verify(const Xbyak::uint8 *f, int pNum)
|
||||
void verify(const Xbyak::uint8_t *f, int pNum)
|
||||
{
|
||||
switch (pNum) {
|
||||
case 0:
|
||||
|
@ -264,7 +264,7 @@ void testAll()
|
|||
}
|
||||
for (int tNum = 0; tNum < maxNum; tNum++) {
|
||||
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
|
||||
const Xbyak::uint8 *f = code.getCurr();
|
||||
const Xbyak::uint8_t *f = code.getCurr();
|
||||
code.gen(pNum, tNum | opt, stackSize);
|
||||
verify(f, pNum);
|
||||
/*
|
||||
|
|
|
@ -12,7 +12,7 @@ g++ $CFLAGS address.cpp -o address
|
|||
./address $1 > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
|
||||
awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./address $1 jit > nm.cpp
|
||||
|
|
|
@ -1,39 +1,44 @@
|
|||
#!/bin/tcsh
|
||||
#!/bin/sh
|
||||
|
||||
set FILTER="grep -v warning"
|
||||
FILTER="grep -v warning"
|
||||
|
||||
if ($1 == "Y") then
|
||||
case $1 in
|
||||
Y)
|
||||
echo "yasm(32bit)"
|
||||
set EXE=yasm
|
||||
set OPT2="-DUSE_YASM -DXBYAK32"
|
||||
set OPT3=win32
|
||||
else if ($1 == "64") then
|
||||
EXE=yasm
|
||||
OPT2="-DUSE_YASM -DXBYAK32"
|
||||
OPT3=win32
|
||||
;;
|
||||
64)
|
||||
echo "nasm(64bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else if ($1 == "Y64") then
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK64
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
Y64)
|
||||
echo "yasm(64bit)"
|
||||
set EXE=yasm
|
||||
set OPT2="-DUSE_YASM -DXBYAK64"
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else
|
||||
EXE=yasm
|
||||
OPT2="-DUSE_YASM -DXBYAK64"
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
*)
|
||||
echo "nasm(32bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
endif
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK32
|
||||
OPT3=win32
|
||||
;;
|
||||
esac
|
||||
|
||||
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
|
||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
|
||||
echo "compile make_nm.cpp"
|
||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
||||
|
||||
./make_nm > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst
|
||||
awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./make_nm jit > nm.cpp
|
||||
|
|
|
@ -1,28 +1,31 @@
|
|||
#!/bin/tcsh
|
||||
#!/bin/sh
|
||||
|
||||
set FILTER="grep -v warning"
|
||||
FILTER="grep -v warning"
|
||||
|
||||
if ($1 == "64") then
|
||||
case $1 in
|
||||
64)
|
||||
echo "nasm(64bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK64
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
*)
|
||||
echo "nasm(32bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
endif
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK32
|
||||
OPT3=win32
|
||||
;;
|
||||
esac
|
||||
|
||||
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
|
||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
|
||||
echo "compile make_512.cpp"
|
||||
g++ $CFLAGS make_512.cpp -o make_512
|
||||
|
||||
./make_512 > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
|
||||
awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./make_512 jit > nm.cpp
|
||||
|
|
|
@ -17,6 +17,10 @@ if /i "%1"=="Y" (
|
|||
set OPT2=-DUSE_YASM -DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else if /i "%1"=="noexcept" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK32 -DXBYAK_NO_EXCEPTION
|
||||
set OPT3=win32
|
||||
) else (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK32
|
||||
|
@ -39,4 +43,4 @@ if /i "%Y%"=="1" (
|
|||
make_nm jit > nm.cpp
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
nm_frame |%FILTER% > x.lst
|
||||
diff -wb x.lst ok.lst && echo "ok"
|
||||
diff -wb x.lst ok.lst && echo "ok"
|
||||
|
|
|
@ -1,45 +1,57 @@
|
|||
#!/bin/tcsh
|
||||
#!/bin/sh
|
||||
|
||||
set FILTER=cat
|
||||
FILTER=cat
|
||||
|
||||
if ($1 == "Y") then
|
||||
case $1 in
|
||||
Y)
|
||||
echo "yasm(32bit)"
|
||||
set EXE=yasm
|
||||
set OPT2="-DUSE_YASM -DXBYAK32"
|
||||
set OPT3=win32
|
||||
else if ($1 == "64") then
|
||||
EXE=yasm
|
||||
OPT2="-DUSE_YASM -DXBYAK32"
|
||||
OPT3=win32
|
||||
;;
|
||||
64)
|
||||
echo "nasm(64bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else if ($1 == "Y64") then
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK64
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
Y64)
|
||||
echo "yasm(64bit)"
|
||||
set EXE=yasm
|
||||
set OPT2="-DUSE_YASM -DXBYAK64"
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else if ($1 == "avx512") then
|
||||
EXE=yasm
|
||||
OPT2="-DUSE_YASM -DXBYAK64"
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
avx512)
|
||||
echo "nasm(64bit) + avx512"
|
||||
set EXE=nasm
|
||||
set OPT2="-DXBYAK64 -DUSE_AVX512"
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else
|
||||
EXE=nasm
|
||||
OPT2="-DXBYAK64 -DUSE_AVX512"
|
||||
OPT3=win64
|
||||
FILTER=./normalize_prefix
|
||||
;;
|
||||
noexcept)
|
||||
echo "nasm(32bit) without exception"
|
||||
EXE=nasm
|
||||
OPT2="-DXBYAK32 -DXBYAK_NO_EXCEPTION"
|
||||
OPT3=win32
|
||||
;;
|
||||
*)
|
||||
echo "nasm(32bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
endif
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK32
|
||||
OPT3=win32
|
||||
;;
|
||||
esac
|
||||
|
||||
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
||||
echo "compile make_nm.cpp"
|
||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
||||
echo "compile make_nm.cpp with $CFLAGS"
|
||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
||||
|
||||
./make_nm > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst
|
||||
awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./make_nm jit > nm.cpp
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,5 +1,16 @@
|
|||
#ifndef XBYAK_XBYAK_UTIL_H_
|
||||
#define XBYAK_XBYAK_UTIL_H_
|
||||
|
||||
#ifdef XBYAK_ONLY_CLASS_CPU
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#ifndef XBYAK_THROW
|
||||
#define XBYAK_THROW(x) ;
|
||||
#define XBYAK_THROW_RET(x, y) return y;
|
||||
#endif
|
||||
#else
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
|
@ -9,6 +20,7 @@
|
|||
@note this header is UNDER CONSTRUCTION!
|
||||
*/
|
||||
#include "xbyak.h"
|
||||
#endif // XBYAK_ONLY_CLASS_CPU
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||
#define XBYAK_INTEL_CPU_SPECIFIC
|
||||
|
@ -80,7 +92,7 @@ typedef enum {
|
|||
CPU detection class
|
||||
*/
|
||||
class Cpu {
|
||||
uint64 type_;
|
||||
uint64_t type_;
|
||||
//system topology
|
||||
bool x2APIC_supported_;
|
||||
static const size_t maxTopologyLevels = 2;
|
||||
|
@ -219,24 +231,24 @@ public:
|
|||
int displayFamily; // family + extFamily
|
||||
int displayModel; // model + extModel
|
||||
|
||||
unsigned int getNumCores(IntelCpuTopologyLevel level) {
|
||||
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||
unsigned int getNumCores(IntelCpuTopologyLevel level) const {
|
||||
if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
|
||||
switch (level) {
|
||||
case SmtLevel: return numCores_[level - 1];
|
||||
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
|
||||
default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||
default: XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
|
||||
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||
{
|
||||
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
||||
return coresSharignDataCache_[i];
|
||||
}
|
||||
unsigned int getDataCacheSize(unsigned int i) const
|
||||
{
|
||||
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
||||
return dataCacheSize_[i];
|
||||
}
|
||||
|
||||
|
@ -270,7 +282,7 @@ public:
|
|||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline uint64 getXfeature()
|
||||
static inline uint64_t getXfeature()
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
|
@ -280,13 +292,13 @@ public:
|
|||
// xgetvb is not support on gcc 4.2
|
||||
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
typedef uint64 Type;
|
||||
typedef uint64_t Type;
|
||||
|
||||
static const Type NONE = 0;
|
||||
static const Type tMMX = 1 << 0;
|
||||
|
@ -323,36 +335,40 @@ public:
|
|||
static const Type tADX = 1 << 28; // adcx, adox
|
||||
static const Type tRDSEED = 1 << 29; // rdseed
|
||||
static const Type tSMAP = 1 << 30; // stac
|
||||
static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest
|
||||
static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort
|
||||
static const Type tF16C = uint64(1) << 33; // vcvtph2ps, vcvtps2ph
|
||||
static const Type tMOVBE = uint64(1) << 34; // mobve
|
||||
static const Type tAVX512F = uint64(1) << 35;
|
||||
static const Type tAVX512DQ = uint64(1) << 36;
|
||||
static const Type tAVX512_IFMA = uint64(1) << 37;
|
||||
static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest
|
||||
static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort
|
||||
static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph
|
||||
static const Type tMOVBE = uint64_t(1) << 34; // mobve
|
||||
static const Type tAVX512F = uint64_t(1) << 35;
|
||||
static const Type tAVX512DQ = uint64_t(1) << 36;
|
||||
static const Type tAVX512_IFMA = uint64_t(1) << 37;
|
||||
static const Type tAVX512IFMA = tAVX512_IFMA;
|
||||
static const Type tAVX512PF = uint64(1) << 38;
|
||||
static const Type tAVX512ER = uint64(1) << 39;
|
||||
static const Type tAVX512CD = uint64(1) << 40;
|
||||
static const Type tAVX512BW = uint64(1) << 41;
|
||||
static const Type tAVX512VL = uint64(1) << 42;
|
||||
static const Type tAVX512_VBMI = uint64(1) << 43;
|
||||
static const Type tAVX512PF = uint64_t(1) << 38;
|
||||
static const Type tAVX512ER = uint64_t(1) << 39;
|
||||
static const Type tAVX512CD = uint64_t(1) << 40;
|
||||
static const Type tAVX512BW = uint64_t(1) << 41;
|
||||
static const Type tAVX512VL = uint64_t(1) << 42;
|
||||
static const Type tAVX512_VBMI = uint64_t(1) << 43;
|
||||
static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
|
||||
static const Type tAVX512_4VNNIW = uint64(1) << 44;
|
||||
static const Type tAVX512_4FMAPS = uint64(1) << 45;
|
||||
static const Type tPREFETCHWT1 = uint64(1) << 46;
|
||||
static const Type tPREFETCHW = uint64(1) << 47;
|
||||
static const Type tSHA = uint64(1) << 48;
|
||||
static const Type tMPX = uint64(1) << 49;
|
||||
static const Type tAVX512_VBMI2 = uint64(1) << 50;
|
||||
static const Type tGFNI = uint64(1) << 51;
|
||||
static const Type tVAES = uint64(1) << 52;
|
||||
static const Type tVPCLMULQDQ = uint64(1) << 53;
|
||||
static const Type tAVX512_VNNI = uint64(1) << 54;
|
||||
static const Type tAVX512_BITALG = uint64(1) << 55;
|
||||
static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
|
||||
static const Type tAVX512_BF16 = uint64(1) << 57;
|
||||
static const Type tAVX512_VP2INTERSECT = uint64(1) << 58;
|
||||
static const Type tAVX512_4VNNIW = uint64_t(1) << 44;
|
||||
static const Type tAVX512_4FMAPS = uint64_t(1) << 45;
|
||||
static const Type tPREFETCHWT1 = uint64_t(1) << 46;
|
||||
static const Type tPREFETCHW = uint64_t(1) << 47;
|
||||
static const Type tSHA = uint64_t(1) << 48;
|
||||
static const Type tMPX = uint64_t(1) << 49;
|
||||
static const Type tAVX512_VBMI2 = uint64_t(1) << 50;
|
||||
static const Type tGFNI = uint64_t(1) << 51;
|
||||
static const Type tVAES = uint64_t(1) << 52;
|
||||
static const Type tVPCLMULQDQ = uint64_t(1) << 53;
|
||||
static const Type tAVX512_VNNI = uint64_t(1) << 54;
|
||||
static const Type tAVX512_BITALG = uint64_t(1) << 55;
|
||||
static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56;
|
||||
static const Type tAVX512_BF16 = uint64_t(1) << 57;
|
||||
static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58;
|
||||
static const Type tAMX_TILE = uint64_t(1) << 59;
|
||||
static const Type tAMX_INT8 = uint64_t(1) << 60;
|
||||
static const Type tAMX_BF16 = uint64_t(1) << 61;
|
||||
static const Type tAVX_VNNI = uint64_t(1) << 62;
|
||||
|
||||
Cpu()
|
||||
: type_(NONE)
|
||||
|
@ -374,19 +390,35 @@ public:
|
|||
if (ECX == get32bitAsBE(amd)) {
|
||||
type_ |= tAMD;
|
||||
getCpuid(0x80000001, data);
|
||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (EDX & (1U << 30)) type_ |= tE3DN;
|
||||
if (EDX & (1U << 22)) type_ |= tMMX2;
|
||||
if (EDX & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (EDX & (1U << 31)) {
|
||||
type_ |= t3DN;
|
||||
// 3DNow! implies support for PREFETCHW on AMD
|
||||
type_ |= tPREFETCHW;
|
||||
}
|
||||
|
||||
if (EDX & (1U << 29)) {
|
||||
// Long mode implies support for PREFETCHW on AMD
|
||||
type_ |= tPREFETCHW;
|
||||
}
|
||||
}
|
||||
if (ECX == get32bitAsBE(intel)) {
|
||||
type_ |= tINTEL;
|
||||
}
|
||||
|
||||
// Extended flags information
|
||||
getCpuid(0x80000000, data);
|
||||
if (EAX >= 0x80000001) {
|
||||
getCpuid(0x80000001, data);
|
||||
|
||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||
if (EDX & (1U << 30)) type_ |= tE3DN;
|
||||
if (EDX & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (EDX & (1U << 22)) type_ |= tMMX2;
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (ECX & (1U << 5)) type_ |= tLZCNT;
|
||||
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
|
||||
}
|
||||
|
||||
getCpuid(1, data);
|
||||
if (ECX & (1U << 0)) type_ |= tSSE3;
|
||||
if (ECX & (1U << 9)) type_ |= tSSSE3;
|
||||
|
@ -407,11 +439,15 @@ public:
|
|||
|
||||
if (type_ & tOSXSAVE) {
|
||||
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
|
||||
uint64 bv = getXfeature();
|
||||
uint64_t bv = getXfeature();
|
||||
if ((bv & 6) == 6) {
|
||||
if (ECX & (1U << 28)) type_ |= tAVX;
|
||||
if (ECX & (1U << 12)) type_ |= tFMA;
|
||||
if (((bv >> 5) & 7) == 7) {
|
||||
// do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support
|
||||
#if !defined(__APPLE__)
|
||||
if (((bv >> 5) & 7) == 7)
|
||||
#endif
|
||||
{
|
||||
getCpuidEx(7, 0, data);
|
||||
if (EBX & (1U << 16)) type_ |= tAVX512F;
|
||||
if (type_ & tAVX512F) {
|
||||
|
@ -434,16 +470,12 @@ public:
|
|||
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
|
||||
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
|
||||
}
|
||||
// EAX=07H, ECX=1
|
||||
getCpuidEx(7, 1, data);
|
||||
if (type_ & tAVX512F) {
|
||||
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxNum >= 7) {
|
||||
getCpuidEx(7, 0, data);
|
||||
const uint32_t maxNumSubLeaves = EAX;
|
||||
if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2;
|
||||
if (EBX & (1U << 3)) type_ |= tBMI1;
|
||||
if (EBX & (1U << 8)) type_ |= tBMI2;
|
||||
|
@ -456,6 +488,16 @@ public:
|
|||
if (EBX & (1U << 14)) type_ |= tMPX;
|
||||
if (EBX & (1U << 29)) type_ |= tSHA;
|
||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
|
||||
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
|
||||
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
||||
if (maxNumSubLeaves >= 1) {
|
||||
getCpuidEx(7, 1, data);
|
||||
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
|
||||
if (type_ & tAVX512F) {
|
||||
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
|
||||
}
|
||||
}
|
||||
}
|
||||
setFamily();
|
||||
setNumCores();
|
||||
|
@ -463,9 +505,11 @@ public:
|
|||
}
|
||||
void putFamily() const
|
||||
{
|
||||
#ifndef XBYAK_ONLY_CLASS_CPU
|
||||
printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n",
|
||||
family, model, stepping, extFamily, extModel);
|
||||
printf("display:family=%X, model=%X\n", displayFamily, displayModel);
|
||||
#endif
|
||||
}
|
||||
bool has(Type type) const
|
||||
{
|
||||
|
@ -473,9 +517,10 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
#ifndef XBYAK_ONLY_CLASS_CPU
|
||||
class Clock {
|
||||
public:
|
||||
static inline uint64 getRdtsc()
|
||||
static inline uint64_t getRdtsc()
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
|
@ -483,7 +528,7 @@ public:
|
|||
#else
|
||||
unsigned int eax, edx;
|
||||
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
|
||||
|
@ -505,10 +550,10 @@ public:
|
|||
count_++;
|
||||
}
|
||||
int getCount() const { return count_; }
|
||||
uint64 getClock() const { return clock_; }
|
||||
uint64_t getClock() const { return clock_; }
|
||||
void clear() { count_ = 0; clock_ = 0; }
|
||||
private:
|
||||
uint64 clock_;
|
||||
uint64_t clock_;
|
||||
int count_;
|
||||
};
|
||||
|
||||
|
@ -558,7 +603,7 @@ public:
|
|||
{
|
||||
if (n_ == maxTblNum) {
|
||||
fprintf(stderr, "ERR Pack::can't append\n");
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this)
|
||||
}
|
||||
tbl_[n_++] = &t;
|
||||
return *this;
|
||||
|
@ -567,7 +612,7 @@ public:
|
|||
{
|
||||
if (n > maxTblNum) {
|
||||
fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
XBYAK_THROW(ERR_BAD_PARAMETER)
|
||||
}
|
||||
n_ = n;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
|
@ -578,7 +623,7 @@ public:
|
|||
{
|
||||
if (n >= n_) {
|
||||
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax)
|
||||
}
|
||||
return *tbl_[n];
|
||||
}
|
||||
|
@ -591,7 +636,7 @@ public:
|
|||
if (num == size_t(-1)) num = n_ - pos;
|
||||
if (pos + num > n_) {
|
||||
fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, Pack())
|
||||
}
|
||||
Pack pack;
|
||||
pack.n_ = num;
|
||||
|
@ -666,9 +711,9 @@ public:
|
|||
, t(t_)
|
||||
{
|
||||
using namespace Xbyak;
|
||||
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
|
||||
if (pNum < 0 || pNum > 4) XBYAK_THROW(ERR_BAD_PNUM)
|
||||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||
if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
|
||||
if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
|
||||
const Reg64& _rsp = code->rsp;
|
||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
|
@ -874,6 +919,8 @@ public:
|
|||
startAddr_ = endAddr;
|
||||
}
|
||||
};
|
||||
#endif // XBYAK_ONLY_CLASS_CPU
|
||||
|
||||
} } // end of util
|
||||
|
||||
#endif
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
//#define CANONICAL_TEST
|
||||
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
using namespace Xbyak::util;
|
||||
|
@ -38,7 +37,6 @@ static void (*mainloop)();
|
|||
static void (*handleException)();
|
||||
|
||||
u32 mem_writes, mem_reads;
|
||||
u32 mem_rewrites_w, mem_rewrites_r;
|
||||
|
||||
static u64 jmp_rsp;
|
||||
|
||||
|
|
|
@ -16,9 +16,7 @@
|
|||
You should have received a copy of the GNU General Public License
|
||||
along with reicast. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef CORE_REC_X64_X64_REGALLOC_H_
|
||||
#define CORE_REC_X64_X64_REGALLOC_H_
|
||||
#pragma once
|
||||
|
||||
//#define OLD_REGALLOC
|
||||
|
||||
|
@ -91,5 +89,3 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8>
|
|||
|
||||
BlockCompiler *compiler;
|
||||
};
|
||||
|
||||
#endif /* CORE_REC_X64_X64_REGALLOC_H_ */
|
||||
|
|
|
@ -67,9 +67,9 @@ void X86RegAlloc::Writeback_FPU(u32 reg, s8 nreg)
|
|||
|
||||
struct DynaRBI : RuntimeBlockInfo
|
||||
{
|
||||
virtual u32 Relink() override;
|
||||
u32 Relink() override;
|
||||
|
||||
virtual void Relocate(void* dst) override {
|
||||
void Relocate(void* dst) override {
|
||||
verify(false);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -16,10 +16,8 @@
|
|||
You should have received a copy of the GNU General Public License
|
||||
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include "build.h"
|
||||
#pragma once
|
||||
|
||||
#define XBYAK32
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
|
|
|
@ -32,10 +32,10 @@ struct X86RegAlloc : RegAlloc<Xbyak::Operand::Code, s8>
|
|||
{
|
||||
X86RegAlloc(X86Compiler *compiler) : compiler(compiler) {}
|
||||
|
||||
virtual void Preload(u32 reg, Xbyak::Operand::Code nreg) override;
|
||||
virtual void Writeback(u32 reg, Xbyak::Operand::Code nreg) override;
|
||||
virtual void Preload_FPU(u32 reg, s8 nreg) override;
|
||||
virtual void Writeback_FPU(u32 reg, s8 nreg) override;
|
||||
void Preload(u32 reg, Xbyak::Operand::Code nreg) override;
|
||||
void Writeback(u32 reg, Xbyak::Operand::Code nreg) override;
|
||||
void Preload_FPU(u32 reg, s8 nreg) override;
|
||||
void Writeback_FPU(u32 reg, s8 nreg) override;
|
||||
|
||||
void doAlloc(RuntimeBlockInfo* block);
|
||||
|
||||
|
|
Loading…
Reference in New Issue