Merge pull request #246 from scribam/xbyak

deps: update xbyak
This commit is contained in:
flyinghead 2021-05-22 15:11:32 +02:00 committed by GitHub
commit c44493cd8a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
45 changed files with 4630 additions and 1094 deletions

View File

@ -82,7 +82,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE
$<$<BOOL:${TEST_AUTOMATION}>:TEST_AUTOMATION>
$<$<BOOL:${ENABLE_LOG}>:DEBUGFAST>)
target_include_directories(${PROJECT_NAME} PRIVATE core core/deps core/deps/stb core/deps/xbyak core/khronos)
target_include_directories(${PROJECT_NAME} PRIVATE core core/deps core/deps/stb core/khronos)
find_package(OpenMP)
if(OpenMP_CXX_FOUND AND NOT APPLE AND USE_OPENMP)
@ -912,6 +912,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
core/deps/vixl/utils-vixl.h)
target_sources(${PROJECT_NAME} PRIVATE core/rec-ARM64/rec_arm64.cpp core/rec-ARM64/arm64_regalloc.h)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|x86_64.*|AMD64.*")
add_subdirectory(core/deps/xbyak)
target_link_libraries(${PROJECT_NAME} PRIVATE xbyak::xbyak)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
target_sources(${PROJECT_NAME} PRIVATE
core/rec-x64/xbyak_base.h

View File

@ -0,0 +1,11 @@
name: test
on: [push]
jobs:
build:
name: test
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- run: sudo apt install nasm yasm g++-multilib tcsh
- run: make test

1
core/deps/xbyak/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/build* # cmake

View File

@ -1,12 +0,0 @@
sudo: true
dist: bionic
language: cpp
compiler:
- gcc
- clang
addons:
apt:
packages:
- nasm yasm g++-multilib tcsh
script:
- make test

View File

@ -1,6 +1,46 @@
cmake_minimum_required(VERSION 2.6)
project(xbyak)
cmake_minimum_required(VERSION 2.6...3.0.2)
project(xbyak CXX)
file(GLOB headers xbyak/*.h)
install(FILES ${headers} DESTINATION include/xbyak)
if (DEFINED CMAKE_VERSION AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.2)
include(GNUInstallDirs)
add_library(${PROJECT_NAME} INTERFACE)
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
target_include_directories(
${PROJECT_NAME} INTERFACE
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
)
install(
TARGETS ${PROJECT_NAME}
EXPORT ${PROJECT_NAME}-targets
)
configure_file(
cmake/config.cmake.in
${PROJECT_NAME}Config.cmake
@ONLY
)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
)
install(
EXPORT ${PROJECT_NAME}-targets
NAMESPACE ${PROJECT_NAME}::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
)
elseif(NOT DEFINED CMAKE_INSTALL_INCLUDEDIR)
set(CMAKE_INSTALL_INCLUDEDIR "include")
endif()
install(
FILES ${headers}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/xbyak
)

View File

@ -1,4 +1,4 @@
PREFIX=/usr/local
PREFIX?=/usr/local
INSTALL_DIR=$(PREFIX)/include/xbyak
all:

View File

@ -0,0 +1 @@
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake")

View File

@ -0,0 +1,26 @@
TARGET=../xbyak/xbyak_mnemonic.h
BIN=sortline gen_code gen_avx512
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers
all: $(TARGET)
sortline: sortline.cpp
$(CXX) $(CFLAGS) $< -o $@
gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp
$(CXX) $(CFLAGS) $< -o $@
gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp
$(CXX) $(CFLAGS) $< -o $@
$(TARGET): $(BIN)
./gen_code | ./sortline > $@
echo "#ifdef XBYAK_ENABLE_OMITTED_OPERAND" >> $@
./gen_code omit | ./sortline >> $@
echo "#endif" >>$@
./gen_code fixed >> $@
echo "#ifndef XBYAK_DISABLE_AVX512" >> $@
./gen_avx512 | ./sortline >> $@
echo "#ifdef XBYAK64" >> $@
./gen_avx512 64 | ./sortline >> $@
echo "#endif" >> $@
echo "#endif" >> $@
clean:
$(RM) $(BIN) $(TARGET)

View File

@ -0,0 +1,170 @@
#include <assert.h>
// copy CodeGenerator::AVXtype
enum AVXtype {
// low 3 bit
T_N1 = 1,
T_N2 = 2,
T_N4 = 3,
T_N8 = 4,
T_N16 = 5,
T_N32 = 6,
T_NX_MASK = 7,
//
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
T_DUP = 1 << 4, // N = (8, 32, 64)
T_66 = 1 << 5,
T_F3 = 1 << 6,
T_F2 = 1 << 7,
T_0F = 1 << 8,
T_0F38 = 1 << 9,
T_0F3A = 1 << 10,
T_L0 = 1 << 11,
T_L1 = 1 << 12,
T_W0 = 1 << 13,
T_W1 = 1 << 14,
T_EW0 = 1 << 15,
T_EW1 = 1 << 16,
T_YMM = 1 << 17, // support YMM, ZMM
T_EVEX = 1 << 18,
T_ER_X = 1 << 19, // xmm{er}
T_ER_Y = 1 << 20, // ymm{er}
T_ER_Z = 1 << 21, // zmm{er}
T_SAE_X = 1 << 22, // xmm{sae}
T_SAE_Y = 1 << 23, // ymm{sae}
T_SAE_Z = 1 << 24, // zmm{sae}
T_MUST_EVEX = 1 << 25, // contains T_EVEX
T_B32 = 1 << 26, // m32bcst
T_B64 = 1 << 27, // m64bcst
T_M_K = 1 << 28, // mem{k}
T_VSIB = 1 << 29,
T_MEM_EVEX = 1 << 30, // use evex if mem
T_XXX
};
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
std::string type2String(int type)
{
std::string str;
int low = type & T_NX_MASK;
if (0 < low) {
const char *tbl[8] = {
"T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32"
};
assert(low < int(sizeof(tbl) / sizeof(tbl[0])));
str = tbl[low - 1];
}
if (type & T_N_VL) {
if (!str.empty()) str += " | ";
str += "T_N_VL";
}
if (type & T_DUP) {
if (!str.empty()) str += " | ";
str += "T_DUP";
}
if (type & T_66) {
if (!str.empty()) str += " | ";
str += "T_66";
}
if (type & T_F3) {
if (!str.empty()) str += " | ";
str += "T_F3";
}
if (type & T_F2) {
if (!str.empty()) str += " | ";
str += "T_F2";
}
if (type & T_0F) {
if (!str.empty()) str += " | ";
str += "T_0F";
}
if (type & T_0F38) {
if (!str.empty()) str += " | ";
str += "T_0F38";
}
if (type & T_0F3A) {
if (!str.empty()) str += " | ";
str += "T_0F3A";
}
if (type & T_L0) {
if (!str.empty()) str += " | ";
str += "VEZ_L0";
}
if (type & T_L1) {
if (!str.empty()) str += " | ";
str += "VEZ_L1";
}
if (type & T_W0) {
if (!str.empty()) str += " | ";
str += "T_W0";
}
if (type & T_W1) {
if (!str.empty()) str += " | ";
str += "T_W1";
}
if (type & T_EW0) {
if (!str.empty()) str += " | ";
str += "T_EW0";
}
if (type & T_EW1) {
if (!str.empty()) str += " | ";
str += "T_EW1";
}
if (type & T_YMM) {
if (!str.empty()) str += " | ";
str += "T_YMM";
}
if (type & T_EVEX) {
if (!str.empty()) str += " | ";
str += "T_EVEX";
}
if (type & T_ER_X) {
if (!str.empty()) str += " | ";
str += "T_ER_X";
}
if (type & T_ER_Y) {
if (!str.empty()) str += " | ";
str += "T_ER_Y";
}
if (type & T_ER_Z) {
if (!str.empty()) str += " | ";
str += "T_ER_Z";
}
if (type & T_SAE_X) {
if (!str.empty()) str += " | ";
str += "T_SAE_X";
}
if (type & T_SAE_Y) {
if (!str.empty()) str += " | ";
str += "T_SAE_Y";
}
if (type & T_SAE_Z) {
if (!str.empty()) str += " | ";
str += "T_SAE_Z";
}
if (type & T_MUST_EVEX) {
if (!str.empty()) str += " | ";
str += "T_MUST_EVEX";
}
if (type & T_B32) {
if (!str.empty()) str += " | ";
str += "T_B32";
}
if (type & T_B64) {
if (!str.empty()) str += " | ";
str += "T_B64";
}
if (type & T_M_K) {
if (!str.empty()) str += " | ";
str += "T_M_K";
}
if (type & T_VSIB) {
if (!str.empty()) str += " | ";
str += "T_VSIB";
}
if (type & T_MEM_EVEX) {
if (!str.empty()) str += " | ";
str += "T_MEM_EVEX";
}
return str;
}

View File

@ -0,0 +1,17 @@
#include <stdio.h>
int main()
{
puts("enum {");
for (int i = 0; i < 256; i++) {
printf(" B");
for (int j = 0; j < 8; j++) {
putchar(i & (1 << (7 - j)) ? '1' : '0');
}
printf("= %d", i);
if (i < 255) putchar(',');
putchar('\n');
}
puts("};");
return 0;
}

View File

@ -0,0 +1,750 @@
#define XBYAK_DONT_READ_LIST
#include <stdio.h>
#include <string.h>
#include "../xbyak/xbyak.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
using namespace Xbyak;
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#define snprintf _snprintf_s
#endif
#include "avx_type.hpp"
void putOpmask(bool only64bit)
{
if (only64bit) {
puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }");
puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }");
return;
}
{
const struct Tbl {
const char *name;
uint8_t code;
} tbl[] = {
{ "kadd", 0x4A },
{ "kand", 0x41 },
{ "kandn", 0x42 },
{ "kor", 0x45 },
{ "kxnor", 0x46 },
{ "kxor", 0x47 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
printf("void %sw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x%02X); }\n", p.name, p.code);
printf("void %sq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x%02X); }\n", p.name, p.code);
printf("void %sb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code);
printf("void %sd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code);
}
printf("void kunpckbw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); }\n");
printf("void kunpckwd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4B); }\n");
printf("void kunpckdq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4B); }\n");
}
{
const struct Tbl {
const char *name;
uint8_t code;
} tbl[] = {
{ "knot", 0x44 },
{ "kortest", 0x98 },
{ "ktest", 0x99 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
printf("void %sw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x%02X); }\n", p.name, p.code);
printf("void %sq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x%02X); }\n", p.name, p.code);
printf("void %sb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code);
printf("void %sd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code);
}
}
{
const struct Tbl {
const char *name;
uint8_t code;
} tbl[] = {
{ "kshiftl", 0x32 },
{ "kshiftr", 0x30 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
printf("void %sw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code);
printf("void %sq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1);
printf("void %sb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code);
printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1);
}
}
puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }");
puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }");
puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }");
puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }");
puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }");
puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }");
puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }");
puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }");
puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }");
puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }");
puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }");
puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }");
puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }");
puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }");
}
// vcmppd(k, x, op)
void putVcmp()
{
const struct Tbl {
uint8_t code;
const char *name;
int type;
bool hasIMM;
} tbl[] = {
{ 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true },
{ 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true },
{ 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true },
{ 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true },
{ 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
{ 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
{ 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32, false },
{ 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
{ 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
{ 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
{ 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
{ 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
{ 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
{ 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
{ 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
{ 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
{ 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
{ 0x26, "vptestmb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
{ 0x26, "vptestmw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x27, "vptestmd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0x27, "vptestmq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x26, "vptestnmb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
{ 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
// XM_X
void putX_XM()
{
const struct Tbl {
uint8_t code;
const char *name;
int type;
} tbl[] = {
{ 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
// putCvt
{ 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
{ 0x79, "vcvtps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_ER_Z },
{ 0xE6, "vcvtqq2pd", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
{ 0x7A, "vcvttpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
{ 0x78, "vcvttpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
{ 0x78, "vcvttps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
{ 0x7A, "vcvtudq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z },
{ 0x7A, "vcvtuqq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z },
{ 0x88, "vexpandpd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
{ 0x88, "vexpandps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x89, "vpexpandd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
{ 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
{ 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
}
puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }");
puts("void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }");
puts("void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }");
puts("void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }");
puts("void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }");
puts("void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }");
puts("void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }");
}
void putM_X()
{
const struct Tbl {
uint8_t code;
const char *name;
int type;
} tbl[] = {
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
}
}
void putXM_X()
{
const struct Tbl {
uint8_t code;
const char *name;
int type;
} tbl[] = {
{ 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
{ 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
{ 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 },
{ 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
}
}
void putX_X_XM_IMM()
{
const struct Tbl {
uint8_t code;
const char *name;
int type;
bool hasIMM;
} tbl[] = {
{ 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true },
{ 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM, true },
{ 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
{ 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
{ 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
{ 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
{ 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0xE2, "vpsraq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_N16, false },
{ 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
{ 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
{ 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
{ 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x7D, "vpermt2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
{ 0x7D, "vpermt2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
{ 0x7E, "vpermt2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x75, "vpermi2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
{ 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
{ 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
{ 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
{ 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false },
{ 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false },
{ 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
{ 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
{ 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true },
{ 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true },
{ 0x55, "vfixupimmsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N8, true },
{ 0x55, "vfixupimmss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N4, true },
{ 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false },
{ 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false },
{ 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false },
{ 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false },
{ 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, true },
{ 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, true },
{ 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false },
{ 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false },
{ 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
{ 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
{ 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true },
{ 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x15, "vprolvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x15, "vprolvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x14, "vprorvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x14, "vprorvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0xCB, "vrcp28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false },
{ 0xCB, "vrcp28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false },
{ 0xCD, "vrsqrt28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false },
{ 0xCD, "vrsqrt28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false },
{ 0x50, "vrangepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
{ 0x50, "vrangeps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
{ 0x51, "vrangesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
{ 0x51, "vrangess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
{ 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
{ 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
{ 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
{ 0x70, "vpshldvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
{ 0x71, "vpshldvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
{ 0x71, "vpshldvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
{ 0x72, "vpshrdw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
{ 0x73, "vpshrdd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
{ 0x73, "vpshrdq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
{ 0x72, "vpshrdvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
{ 0x73, "vpshrdvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
{ 0x73, "vpshrdvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
{ 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
{ 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
void putShift()
{
const struct Tbl {
const char *name;
uint8_t code;
int idx;
int type;
} tbl[] = {
{ "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 },
{ "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
{ "vprolq", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 },
{ "vprord", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
{ "vprorq", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code);
}
}
void putExtractInsert()
{
{
const struct Tbl {
const char *name;
uint8_t code;
int type;
bool isZMM;
} tbl[] = {
{ "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
{ "vextractf64x2", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
{ "vextractf32x8", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
{ "vextractf64x4", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
{ "vextracti32x4", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
{ "vextracti64x2", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
{ "vextracti32x8", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
{ "vextracti64x4", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM";
printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code);
}
}
{
const struct Tbl {
const char *name;
uint8_t code;
int type;
bool isZMM;
} tbl[] = {
{ "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
{ "vinsertf64x2", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
{ "vinsertf32x8", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
{ "vinsertf64x4", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
{ "vinserti32x4", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
{ "vinserti64x2", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
{ "vinserti32x8", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
{ "vinserti64x4", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
const char *x = p.isZMM ? "Zmm" : "Ymm";
const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))";
printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {"
"if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) "
"opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code);
}
}
}
void putBroadcast(bool only64bit)
{
{
const struct Tbl {
uint8_t code;
const char *name;
int type;
int reg;
} tbl[] = {
{ 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 },
{ 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 },
{ 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 },
{ 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64},
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) {
printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code);
}
}
}
if (only64bit) return;
puts("void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }");
puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }");
puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); }");
puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); }");
puts("void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }");
puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x59); }");
puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x5A); }");
puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }");
puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }");
puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }");
}
void putCvt()
{
puts("void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }");
puts("void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }");
puts("void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }");
puts("void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }");
puts("void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }");
puts("void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }");
puts("void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }");
puts("void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }");
puts("void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
puts("void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
puts("void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
puts("void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
puts("void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }");
puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
}
enum { // same as xbyak.h
xx_yy_zz = 0,
xx_yx_zy = 1,
xx_xy_yz = 2,
};
void putGather()
{
const struct Tbl {
const char *name;
int type;
uint8_t code;
int mode;
} tbl[] = {
{ "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz },
{ "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy },
{ "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz },
{ "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz },
{ "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz },
{ "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy },
{ "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz },
{ "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type | T_VSIB);
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
}
}
void putScatter()
{
const struct Tbl {
const char *name;
int type;
uint8_t code;
int mode; // reverse of gather
} tbl[] = {
{ "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz },
{ "vpscatterdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA0, xx_yx_zy },
{ "vpscatterqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA1, xx_xy_yz },
{ "vpscatterqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA1, xx_yy_zz },
{ "vscatterdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA2, xx_yy_zz },
{ "vscatterdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA2, xx_yx_zy },
{ "vscatterqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA3, xx_xy_yz },
{ "vscatterqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA3, xx_yy_zz },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type | T_VSIB);
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
}
}
void putShuff()
{
puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }");
puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }");
puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }");
puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }");
}
void putMov()
{
puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }");
puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }");
puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }");
puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }");
puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }");
puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }");
puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }");
puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }");
{
const struct Tbl {
uint8_t code;
const char *name;
int type;
int mode;
} tbl[] = {
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false");
}
}
}
void putX_XM_IMM()
{
const struct Tbl {
uint8_t code;
const char *name;
int type;
bool hasIMM;
} tbl[] = {
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
{ 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
{ 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
{ 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
{ 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x44, "vplzcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x44, "vplzcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
{ 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false },
{ 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
{ 0x55, "vpopcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
{ 0x55, "vpopcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
{ 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false },
{ 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
void putMisc()
{
puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }");
puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }");
{
const struct Tbl {
const char *name;
int zm;
int type;
uint8_t code;
bool isZmm;
} tbl[] = {
{ "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true },
{ "vgatherpf0qps", 1, T_EW0 | T_N4, 0xC7, true },
{ "vgatherpf0dpd", 1, T_EW1 | T_N8, 0xC6, false },
{ "vgatherpf0qpd", 1, T_EW1 | T_N8, 0xC7, true },
{ "vgatherpf1dps", 2, T_EW0 | T_N4, 0xC6, true },
{ "vgatherpf1qps", 2, T_EW0 | T_N4, 0xC7, true },
{ "vgatherpf1dpd", 2, T_EW1 | T_N8, 0xC6, false },
{ "vgatherpf1qpd", 2, T_EW1 | T_N8, 0xC7, true },
{ "vscatterpf0dps", 5, T_EW0 | T_N4, 0xC6, true },
{ "vscatterpf0qps", 5, T_EW0 | T_N4, 0xC7, true },
{ "vscatterpf0dpd", 5, T_EW1 | T_N8, 0xC6, false },
{ "vscatterpf0qpd", 5, T_EW1 | T_N8, 0xC7, true },
{ "vscatterpf1dps", 6, T_EW0 | T_N4, 0xC6, true },
{ "vscatterpf1qps", 6, T_EW0 | T_N4, 0xC7, true },
{ "vscatterpf1dpd", 6, T_EW1 | T_N8, 0xC6, false },
{ "vscatterpf1qpd", 6, T_EW1 | T_N8, 0xC7, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n"
, p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
}
}
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }");
puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }");
puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }");
}
void putV4FMA()
{
puts("void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); }");
puts("void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }");
puts("void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }");
puts("void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }");
puts("void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }");
puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }");
}
int main(int argc, char *[])
{
bool only64bit = argc == 2;
putOpmask(only64bit);
putBroadcast(only64bit);
if (only64bit) {
return 0;
}
putVcmp();
putX_XM();
putM_X();
putXM_X();
putX_X_XM_IMM();
putShift();
putExtractInsert();
putCvt();
putGather();
putShuff();
putMov();
putX_XM_IMM();
putMisc();
putScatter();
putV4FMA();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
#include <iostream>
#include <fstream>
#include <string>
#include <set>
typedef std::set<std::string> StrSet;
int main()
{
StrSet ss;
std::string line;
while (std::getline(std::cin, line)) {
if (!line.empty() && line[line.size() - 1] == '\n') {
line.resize(line.size() - 1);
}
if (!line.empty()) {
ss.insert(line);
}
}
for (StrSet::const_iterator i = ss.begin(), ie = ss.end(); i != ie; ++i) {
std::cout << *i << std::endl;
}
}

View File

@ -0,0 +1,17 @@
@echo off
set OPT=/EHsc -I../ /W4 -D_CRT_SECURE_NO_WARNINGS
set TARGET=..\\xbyak\\xbyak_mnemonic.h
set SORT=sortline
cl gen_code.cpp %OPT%
gen_code | %SORT% > %TARGET%
echo #ifdef XBYAK_ENABLE_OMITTED_OPERAND>> %TARGET%
gen_code omit | %SORT% >> %TARGET%
echo #endif>>%TARGET%
gen_code fixed >> %TARGET%
cl gen_avx512.cpp %OPT%
echo #ifndef XBYAK_DISABLE_AVX512>> %TARGET%
gen_avx512 | %SORT% >> %TARGET%
echo #ifdef XBYAK64>> %TARGET%
gen_avx512 64 | %SORT% >> %TARGET%
echo #endif>> %TARGET%
echo #endif>> %TARGET%

View File

@ -1,11 +1,14 @@
[![Build Status](https://travis-ci.org/herumi/xbyak.png)](https://travis-ci.org/herumi/xbyak)
[![Build Status](https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/xbyak/actions/workflows/main.yml)
# Xbyak 5.891 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
# Xbyak 5.992 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
## Abstract
Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
The pronunciation of Xbyak is `kəi-bja-k`.
It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world.
## Feature
* header file only
* Intel/MASM like syntax
@ -15,6 +18,13 @@ Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x6
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
### News
- vnni instructions such as vpdpbusd supports vex encoding.
- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit.
- (Windows) `#include <winsock2.h>` has been removed from xbyak.h, so add it explicitly if you need it.
- support exception-less mode see. [Exception-less mode](#exception-less-mode)
- `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined.
### Supported OS
* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
@ -148,9 +158,15 @@ vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]);
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
```
### Remark
* `k1`, ..., `k7` are opmask registers.
- `k0` is dealt as no mask.
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
* `k4 | k3` is different from `k3 | k4`.
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
@ -331,9 +347,9 @@ public:
## User allocated memory
You can make jit code on prepaired memory.
You can make jit code on prepared memory.
Call `setProtectModeRE` yourself to change memory mode if using the prepaired memory.
Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.
```
uint8_t alignas(4096) buf[8192]; // C++11 or later
@ -400,15 +416,22 @@ c.setProtectModeRE();
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
See [protect-re.cpp](sample/protect-re.cpp).
## Exception-less mode
If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`.
In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong.
The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`.
`CodeGenerator::reset()` calls `ClearError()`.
## Macro
* **XBYAK32** is defined on 32bit.
* **XBYAK64** is defined on 64bit.
* **XBYAK64_WIN** is defined on 64bit Windows(VC)
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin
* **XBYAK64_WIN** is defined on 64bit Windows(VC).
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin.
* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, ....
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future)
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future).
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro.
* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`.
## Sample
@ -423,6 +446,21 @@ modified new BSD License
http://opensource.org/licenses/BSD-3-Clause
## History
* 2021/May/09 ver 5.992 support endbr32 and endbr64
* 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14
* 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito)
* 2020/Oct/17 ver 5.98 support the form of [scale * reg]
* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc.
* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later
* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`.
* 2020/Jul/28 ver 5.94 remove #include <winsock2.h> (only windows)
* 2020/Jul/21 ver 5.93 support exception-less mode
* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov)
* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64
* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso)
* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined.
* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask)
* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register
* 2020/Feb/26 ver 5.891 fix typo of type
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
* 2019/Dec/20 ver 5.88 fix compile error on Windows
@ -575,3 +613,5 @@ http://opensource.org/licenses/BSD-3-Clause
## Author
MITSUNARI Shigeo(herumi@nifty.com)
## Sponsors welcome
[GitHub Sponsor](https://github.com/sponsors/herumi)

View File

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.891
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.992
-----------------------------------------------------------------------------
◎概要
@ -29,12 +29,14 @@ and, orなどを使いたい場合は-fno-operator-namesをgcc/clangに指定し
◎準備
xbyak.h
xbyak_bin2hex.h
xbyak_mnemonic.h
これらを同一のパスに入れてインクルードパスに追加してください。
Linuxではmake installで/usr/local/include/xbyakにコピーされます。
-----------------------------------------------------------------------------
◎下位互換性の破れ
* push byte, immまたはpush word, immが下位8bit, 16bitにキャストした値を使うように変更。
* (Windows) `<winsock2.h>`をincludeしなくなったので必要なら明示的にincludeしてください。
* XBYAK_USE_MMAP_ALLOCATORがデフォルトで有効になりました。従来の方式にする場合はXBYAK_DONT_USE_MMAP_ALLOCATORを定義してください。
* Xbyak::Errorの型をenumからclassに変更
** 従来のenumの値をとるにはintにキャストしてください。
* (古い)Reg32eクラスを(新しい)Reg32eとRegExpに分ける。
@ -44,6 +46,13 @@ Linuxではmake installで/usr/local/include/xbyakにコピーされます。
-----------------------------------------------------------------------------
◎新機能
例外なしモード追加
XBYAK_NO_EXCEPTIONを定義してコンパイルするとgcc/clangで-fno-exceptionsオプションでコンパイルできます。
エラーは例外の代わりに`Xbyak::GetError()`で通達されます。
この値が0でなければ何か問題が発生しています。
この値は自動的に変更されないので`Xbyak::ClearError()`でリセットしてください。
`CodeGenerator::reset()`は`ClearError()`を呼びます。
MmapAllocator追加
これはUnix系OSでのみの仕様です。XBYAK_USE_MMAP_ALLOCATORを使うと利用できます。
デフォルトのAllocatorはメモリ確保時にposix_memalignを使います。
@ -52,7 +61,6 @@ map countの最大値は/proc/sys/vm/max_map_countに書かれています。
デフォルトでは3万個ほどのXbyak::CodeGeneratorインスタンスを生成するとエラーになります。
test/mprotect_test.cppで確認できます。
これを避けるためにはmmapを使うMmapAllocatorを使ってください。
将来この挙動がデフォルトになるかもしれません。
AutoGrowモード追加
@ -155,6 +163,9 @@ vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5)
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 256-bit
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
注意
* k1, ..., k7 は新しいopmaskレジスタです。
@ -371,6 +382,21 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
2021/05/09 ver 5.992 endbr32とendbr64のサポート
2020/11/16 ver 5.991 g++-5のC++14でconstexpr機能の抑制
2020/10/19 ver 5.99 VNNI命令サポート(Thanks to akharito)
2020/10/17 ver 5.98 [scale * reg]のサポート
2020/09/08 ver 5.97 uint32などをuint32_tに置換
2020/08/28 ver 5.95 レジスタクラスのコンストラクタがconstexprに対応(C++14以降)
2020/08/04 ver 5.941 `CodeGenerator::reset()`が`ClearError()`を呼ぶように変更
2020/07/28 ver 5.94 #include <winsock2.h>の削除 (only windows)
2020/07/21 ver 5.93 例外なしモード追加
2020/06/30 ver 5.92 Intel AMX命令サポート (Thanks to nshustrov)
2020/06/19 ver 5.913 32ビット環境でXBYAK64を定義したときのmov(r64, imm64)を修正
2020/06/19 ver 5.912 macOSの古いXcodeでもMAP_JITを有効にする(Thanks to rsdubtso)
2020/05/10 ver 5.911 Linux/macOSでXBYAK_USE_MMAP_ALLOCATORがデフォルト有効になる
2020/04/20 ver 5.91 マスクレジスタk0を受け入れる(マスクをしない)
2020/04/09 ver 5.90 kmov{b,w,d,q}がサポートされないレジスタを受けると例外を投げる
2020/02/26 ver 5.891 zm0のtype修正
2020/01/03 ver 5.89 vfpclasspdの処理エラー修正
2019/12/20 ver 5.88 Windowsでのコンパイルエラー修正

View File

@ -1,10 +1,12 @@
TARGET = test quantize bf toyvm test_util memfunc static_buf jmp_table
XBYAK_INC=../xbyak/xbyak.h
BOOST_EXIST=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1")
UNAME_M=$(shell uname -m)
ONLY_64BIT=0
ifeq ($(shell uname -s),Darwin)
ONLY_64BIT=1
OS=mac
ifeq ($(UNAME_M),x86_64)
BIT=64
endif
@ -27,19 +29,27 @@ else
endif
ifeq ($(BIT),64)
TARGET += test64 bf64 memfunc64 test_util64 static_buf64 jmp_table64
TARGET += test64 bf64 memfunc64 test_util64 jmp_table64
ifeq ($(BOOST_EXIST),1)
TARGET += calc64 #calc2_64
endif
endif
ifneq ($(OS),mac)
TARGET += static_buf64
endif
ifneq ($(ONLY_64BIT),1)
TARGET += test quantize bf toyvm test_util memfunc static_buf jmp_table
ifeq ($(BOOST_EXIST),1)
TARGET += calc #calc2
TARGET += calc #calc2
endif
endif
all: $(TARGET)
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith -pedantic
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith #-pedantic
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN)

View File

@ -148,7 +148,7 @@ public:
}
};
void dump(const Xbyak::uint8 *code, size_t size)
void dump(const uint8_t *code, size_t size)
{
puts("#include <stdio.h>\nstatic int stack[128 * 1024];");
#ifdef _MSC_VER

View File

@ -102,7 +102,7 @@ private:
MAX_CONST_NUM = 32
};
MIE_ALIGN(16) double constTbl_[MAX_CONST_NUM];
Xbyak::uint64 negConst_;
Xbyak::uint64_t negConst_;
size_t constTblPos_;
#ifdef XBYAK32
const Xbyak::Reg32& varTbl_;
@ -118,7 +118,7 @@ public:
64bit: x [rcx](win), xmm0(gcc), return xmm0
*/
Jit()
: negConst_(Xbyak::uint64(1) << 63)
: negConst_(Xbyak::uint64_t(1) << 63)
, constTblPos_(0)
#ifdef XBYAK32
, varTbl_(eax)

View File

@ -48,9 +48,6 @@
#pragma warning(disable : 4996) // scanf
#endif
typedef Xbyak::uint64 uint64;
typedef Xbyak::uint32 uint32;
const int N = 64;
class Quantize : public Xbyak::CodeGenerator {
@ -66,7 +63,7 @@ public:
output : eax = [esi+offset] / dividend
destroy : edx
*/
void udiv(uint32 dividend, int offset)
void udiv(uint32_t dividend, int offset)
{
mov(eax, ptr[esi + offset]);
@ -83,11 +80,11 @@ public:
return;
}
uint64 mLow, mHigh;
uint64_t mLow, mHigh;
int len = ilog2(odd) + 1;
{
uint64 roundUp = uint64(1) << (32 + len);
uint64 k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
uint64_t roundUp = uint64_t(1) << (32 + len);
uint64_t k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
mLow = roundUp / odd;
mHigh = (roundUp + k) / odd;
}
@ -96,12 +93,12 @@ public:
mLow >>= 1; mHigh >>= 1; len--;
}
uint64 m; int a;
uint64_t m; int a;
if ((mHigh >> 32) == 0) {
m = mHigh; a = 0;
} else {
len = ilog2(odd);
uint64 roundDown = uint64(1) << (32 + len);
uint64_t roundDown = uint64_t(1) << (32 + len);
mLow = roundDown / odd;
int r = (int)(roundDown % odd);
m = (r <= (odd >> 1)) ? mLow : mLow + 1;
@ -124,9 +121,9 @@ public:
mov(eax, edx);
}
/*
quantize(uint32 dest[64], const uint32 src[64]);
quantize(uint32_t dest[64], const uint32_t src[64]);
*/
Quantize(const uint32 qTbl[64])
Quantize(const uint32_t qTbl[64])
{
push(esi);
push(edi);
@ -143,7 +140,7 @@ public:
}
};
void quantize(uint32 dest[64], const uint32 src[64], const uint32 qTbl[64])
void quantize(uint32_t dest[64], const uint32_t src[64], const uint32_t qTbl[64])
{
for (int i = 0; i < N; i++) {
dest[i] = src[i] / qTbl[i];
@ -170,7 +167,7 @@ int main(int argc, char *argv[])
}
}
printf("q=%d\n", q);
uint32 qTbl[] = {
uint32_t qTbl[] = {
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
@ -187,16 +184,16 @@ int main(int argc, char *argv[])
}
try {
uint32 src[N];
uint32 dest[N];
uint32 dest2[N];
uint32_t src[N];
uint32_t dest[N];
uint32_t dest2[N];
for (int i = 0; i < N; i++) {
src[i] = rand() % 2048;
}
Quantize jit(qTbl);
//printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode());
void (*quantize2)(uint32*, const uint32*, const uint32 *) = jit.getCode<void (*)(uint32*, const uint32*, const uint32 *)>();
void (*quantize2)(uint32_t*, const uint32_t*, const uint32_t *) = jit.getCode<void (*)(uint32_t*, const uint32_t*, const uint32_t *)>();
quantize(dest, src, qTbl);
quantize2(dest2, src, qTbl);

View File

@ -163,15 +163,15 @@ int main()
// use memory allocated by user
using namespace Xbyak;
const size_t codeSize = 4096;
uint8 buf[codeSize + 16];
uint8 *p = CodeArray::getAlignedAddress(buf);
uint8_t buf[codeSize + 16];
uint8_t *p = CodeArray::getAlignedAddress(buf);
Sample s(p, codeSize);
if (!CodeArray::protect(p, codeSize, CodeArray::PROTECT_RWE)) {
fprintf(stderr, "can't protect\n");
return 1;
}
int (*func)(int) = s.getCode<int (*)(int)>();
const uint8 *funcp = reinterpret_cast<const uint8*>(func);
const uint8_t *funcp = reinterpret_cast<const uint8_t*>(func);
if (funcp != p) {
fprintf(stderr, "internal error %p %p\n", p, funcp);
return 1;

View File

@ -1,12 +1,13 @@
#include <stdio.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak_util.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
struct PopCountTest : public Xbyak::CodeGenerator {
PopCountTest(int n)
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
{
ret();
mov(eax, n);
popcnt(eax, eax);
ret();
@ -80,6 +81,10 @@ void putCPUinfo()
{ Cpu::tAVX512_VPOPCNTDQ, "avx512_vpopcntdq" },
{ Cpu::tAVX512_BF16, "avx512_bf16" },
{ Cpu::tAVX512_VP2INTERSECT, "avx512_vp2intersect" },
{ Cpu::tAMX_TILE, "amx(tile)" },
{ Cpu::tAMX_INT8, "amx(int8)" },
{ Cpu::tAMX_BF16, "amx(bf16)" },
{ Cpu::tAVX_VNNI, "avx_vnni" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
@ -88,12 +93,16 @@ void putCPUinfo()
if (cpu.has(Cpu::tPOPCNT)) {
const int n = 0x12345678; // bitcount = 13
const int ok = 13;
int r = PopCountTest(n).getCode<int (*)()>()();
PopCountTest code(n);
code.setProtectModeRE();
int (*f)() = code.getCode<int (*)()>();
int r = f();
if (r == ok) {
puts("popcnt ok");
} else {
printf("popcnt ng %d %d\n", r, ok);
}
code.setProtectModeRW();
}
/*
displayFamily displayModel

View File

@ -39,7 +39,7 @@
using namespace Xbyak;
class ToyVm : public Xbyak::CodeGenerator {
typedef std::vector<uint32> Buffer;
typedef std::vector<uint32_t> Buffer;
public:
enum Reg {
A, B
@ -53,14 +53,14 @@ public:
{
::memset(mem_, 0, sizeof(mem_));
}
void vldi(Reg r, uint16 imm) { encode(LDI, r, imm); }
void vld(Reg r, uint16 idx) { encode(LD, r, idx); }
void vst(Reg r, uint16 idx) { encode(ST, r, idx); }
void vadd(Reg r, uint16 idx) { encode(ADD, r, idx); }
void vaddi(Reg r, uint16 imm) { encode(ADDI, r, imm); }
void vsub(Reg r, uint16 idx) { encode(SUB, r, idx); }
void vsubi(Reg r, uint16 imm) { encode(SUBI, r, imm); }
void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast<uint16>(offset)); }
void vldi(Reg r, uint16_t imm) { encode(LDI, r, imm); }
void vld(Reg r, uint16_t idx) { encode(LD, r, idx); }
void vst(Reg r, uint16_t idx) { encode(ST, r, idx); }
void vadd(Reg r, uint16_t idx) { encode(ADD, r, idx); }
void vaddi(Reg r, uint16_t imm) { encode(ADDI, r, imm); }
void vsub(Reg r, uint16_t idx) { encode(SUB, r, idx); }
void vsubi(Reg r, uint16_t imm) { encode(SUBI, r, imm); }
void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast<uint16_t>(offset)); }
void vput(Reg r) { encode(PUT, r); }
void setMark()
{
@ -73,12 +73,12 @@ public:
void run()
{
bool debug = false;//true;
uint32 reg[2] = { 0, 0 };
uint32_t reg[2] = { 0, 0 };
const size_t end = code_.size();
uint32 pc = 0;
uint32_t pc = 0;
for (;;) {
uint32 x = code_[pc];
uint32 code, r, imm;
uint32_t x = code_[pc];
uint32_t code, r, imm;
decode(code, r, imm, x);
if (debug) {
printf("---\n");
@ -149,11 +149,11 @@ public:
xor_(edi, edi);
mov(mem, (size_t)mem_);
const size_t end = code_.size();
uint32 pc = 0;
uint32 labelNum = 0;
uint32_t pc = 0;
uint32_t labelNum = 0;
for (;;) {
uint32 x = code_[pc];
uint32 code, r, imm;
uint32_t x = code_[pc];
uint32_t code, r, imm;
decode(code, r, imm, x);
L(Label::toStr(labelNum++));
switch (code) {
@ -229,18 +229,18 @@ public:
ret();
}
private:
uint32 mem_[65536];
uint32_t mem_[65536];
Buffer code_;
int mark_;
void decode(uint32& code, uint32& r, uint32& imm, uint32 x)
void decode(uint32_t& code, uint32_t& r, uint32_t& imm, uint32_t x)
{
code = x >> 24;
r = (x >> 16) & 0xff;
imm = x & 0xffff;
}
void encode(Code code, Reg r, uint16 imm = 0)
void encode(Code code, Reg r, uint16_t imm = 0)
{
uint32 x = (code << 24) | (r << 16) | imm;
uint32_t x = (code << 24) | (r << 16) | imm;
code_.push_back(x);
}
};
@ -262,7 +262,7 @@ public:
*/
vldi(A, 1); // c
vst(A, 0); // p(1)
vldi(B, static_cast<uint16>(n));
vldi(B, static_cast<uint16_t>(n));
vst(B, 2); // n
// lp
setMark();
@ -283,9 +283,9 @@ public:
}
};
void fibC(uint32 n)
void fibC(uint32_t n)
{
uint32 p, c, t;
uint32_t p, c, t;
p = 1;
c = 1;
lp:

View File

@ -1,9 +1,18 @@
TARGET = make_nm normalize_prefix jmp address bad_address misc cvt_test cvt_test32
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception
XBYAK_INC=../xbyak/xbyak.h
UNAME_S=$(shell uname -s)
BIT=32
ifeq ($(shell uname -m),x86_64)
BIT=64
endif
ONLY_64BIT=0
ifeq ($(UNAME_S),Darwin)
# 32-bit binary is not supported
ONLY_64BIT=1
endif
ifeq ($(ONLY_64BIT),0)
TARGET += jmp address
endif
ifeq ($(BIT),64)
TARGET += jmp64 address64
@ -36,18 +45,24 @@ cvt_test: cvt_test.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) $< -o $@
cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
noexception: noexception.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) $< -o $@ -fno-exceptions
test_nm: normalize_prefix jmp bad_address $(TARGET)
test_nm: normalize_prefix $(TARGET)
$(MAKE) -C ../gen
ifneq ($(ONLY_64BIT),1)
./test_nm.sh
./test_nm.sh noexcept
./noexception
./test_nm.sh Y
./test_nm.sh avx512
./test_address.sh
./jmp
./cvt_test32
endif
./bad_address
./misc
./cvt_test
./cvt_test32
ifeq ($(BIT),64)
./test_address.sh 64
./test_nm.sh 64
@ -56,8 +71,10 @@ ifeq ($(BIT),64)
endif
test_avx: normalize_prefix
ifneq ($(ONLY_64BIT),0)
./test_avx.sh
./test_avx.sh Y
endif
ifeq ($(BIT),64)
./test_address.sh 64
./test_avx.sh 64
@ -65,7 +82,9 @@ ifeq ($(BIT),64)
endif
test_avx512: normalize_prefix
ifneq ($(ONLY_64BIT),0)
./test_avx512.sh
endif
ifeq ($(BIT),64)
./test_avx512.sh 64
endif

View File

@ -117,7 +117,7 @@ CYBOZU_TEST_AUTO(test1)
int offset;
bool isBack;
bool isShort;
uint8 result[6];
uint8_t result[6];
int size;
} tbl[] = {
{ 0, true, true, { 0xeb, 0xfe }, 2 },
@ -133,7 +133,7 @@ CYBOZU_TEST_AUTO(test1)
const Tbl *p = &tbl[i];
for (int k = 0; k < 2; k++) {
TestJmp jmp(p->offset, p->isBack, p->isShort, k == 0);
const uint8 *q = (const uint8*)jmp.getCode();
const uint8_t *q = (const uint8_t*)jmp.getCode();
if (p->isBack) q += p->offset; /* skip nop */
for (int j = 0; j < p->size; j++) {
CYBOZU_TEST_EQUAL(q[j], p->result[j]);
@ -207,7 +207,7 @@ CYBOZU_TEST_AUTO(testJmpCx)
CYBOZU_TEST_AUTO(loop)
{
const uint8 ok[] = {
const uint8_t ok[] = {
// lp:
0x31, 0xC0, // xor eax, eax
0xE2, 0xFC, // loop lp
@ -372,11 +372,11 @@ CYBOZU_TEST_AUTO(test3)
}
#endif
Xbyak::uint8 bufL[4096 * 32];
Xbyak::uint8 bufS[4096 * 2];
uint8_t bufL[4096 * 32];
uint8_t bufS[4096 * 2];
struct MyAllocator : Xbyak::Allocator {
Xbyak::uint8 *alloc(size_t size)
uint8_t *alloc(size_t size)
{
if (size < sizeof(bufS)) {
printf("test use bufS(%d)\n", (int)size);
@ -389,7 +389,7 @@ struct MyAllocator : Xbyak::Allocator {
fprintf(stderr, "no memory %d\n", (int)size);
exit(1);
}
void free(Xbyak::uint8 *)
void free(uint8_t *)
{
}
} myAlloc;
@ -428,6 +428,7 @@ CYBOZU_TEST_AUTO(test4)
}
}
#ifndef __APPLE__
CYBOZU_TEST_AUTO(test5)
{
struct Test5 : Xbyak::CodeGenerator {
@ -475,8 +476,9 @@ CYBOZU_TEST_AUTO(test5)
gm.assign((const char*)gc.getCode(), gc.getSize());
CYBOZU_TEST_EQUAL(fm, gm);
}
#endif
size_t getValue(const uint8* p)
size_t getValue(const uint8_t* p)
{
size_t v = 0;
for (size_t i = 0; i < sizeof(size_t); i++) {
@ -485,7 +487,7 @@ size_t getValue(const uint8* p)
return v;
}
void checkAddr(const uint8 *p, size_t offset, size_t expect)
void checkAddr(const uint8_t *p, size_t offset, size_t expect)
{
size_t v = getValue(p + offset);
CYBOZU_TEST_EQUAL(v, size_t(p) + expect);
@ -533,7 +535,7 @@ CYBOZU_TEST_AUTO(MovLabel)
const struct {
int pos;
uint8 ok;
uint8_t ok;
} tbl[] = {
#ifdef XBYAK32
{ 0x00, 0x90 },
@ -567,11 +569,11 @@ CYBOZU_TEST_AUTO(MovLabel)
const bool useNewLabel = k == 0;
MovLabelCode code(grow, useNewLabel);
if (grow) code.ready();
const uint8* const p = code.getCode();
const uint8_t* const p = code.getCode();
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
int pos = tbl[i].pos;
uint8 x = p[pos];
uint8 ok = tbl[i].ok;
uint8_t x = p[pos];
uint8_t ok = tbl[i].ok;
CYBOZU_TEST_EQUAL(x, ok);
}
#ifdef XBYAK32
@ -1217,11 +1219,11 @@ CYBOZU_TEST_AUTO(rip_jmp)
CYBOZU_TEST_EQUAL(ret, ret1234() + ret9999());
}
#ifdef XBYAK64_GCC
#if 0
CYBOZU_TEST_AUTO(rip_addr)
{
/*
assume |&x - &code| < 2GiB
we can't assume |&x - &code| < 2GiB anymore
*/
static int x = 5;
struct Code : Xbyak::CodeGenerator {
@ -1236,6 +1238,8 @@ CYBOZU_TEST_AUTO(rip_addr)
CYBOZU_TEST_EQUAL(x, 123);
}
#endif
#ifndef __APPLE__
CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
{
MIE_ALIGN(4096) static char buf[8192];
@ -1260,6 +1264,7 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
code.setProtectModeRW();
}
#endif
#endif
struct ReleaseTestCode : Xbyak::CodeGenerator {
ReleaseTestCode(Label& L1, Label& L2, Label& L3)

View File

@ -9,111 +9,111 @@ using namespace Xbyak;
const int bitEnd = 64;
const uint64 YMM_SAE = 1ULL << 0;
const uint64 _XMM = 1ULL << 1;
const uint64 _MEM = 1ULL << 2;
const uint64 _REG32 = 1ULL << 3;
const uint64 EAX = 1ULL << 4;
const uint64 IMM32 = 1ULL << 5;
const uint64 IMM8 = 1ULL << 6;
const uint64 _REG8 = 1ULL << 7;
const uint64 _REG16 = 1ULL << 8;
const uint64 XMM_K = 1ULL << 9;
const uint64 YMM_K = 1ULL << 10;
const uint64 ZMM_K = 1ULL << 11;
const uint64 AX = 1ULL << 12;
const uint64 AL = 1ULL << 13;
const uint64 IMM_1 = 1ULL << 14;
const uint64 MEM8 = 1ULL << 15;
const uint64 MEM16 = 1ULL << 16;
const uint64 MEM32 = 1ULL << 17;
const uint64 VM32Z = 1ULL << 19;
const uint64 K_K = 1ULL << 20;
const uint64 MEM_ONLY_DISP = 1ULL << 21;
const uint64 VM32X_K = 1ULL << 23;
const uint64 _YMM = 1ULL << 24;
const uint64 VM32X_32 = 1ULL << 39;
const uint64 VM32X_64 = 1ULL << 40;
const uint64 VM32Y_32 = 1ULL << 41;
const uint64 VM32Y_64 = 1ULL << 42;
const uint64 VM32Z_K = 1ULL << 32;
const uint64_t YMM_SAE = 1ULL << 0;
const uint64_t _XMM = 1ULL << 1;
const uint64_t _MEM = 1ULL << 2;
const uint64_t _REG32 = 1ULL << 3;
const uint64_t EAX = 1ULL << 4;
const uint64_t IMM32 = 1ULL << 5;
const uint64_t IMM8 = 1ULL << 6;
const uint64_t _REG8 = 1ULL << 7;
const uint64_t _REG16 = 1ULL << 8;
const uint64_t XMM_K = 1ULL << 9;
const uint64_t YMM_K = 1ULL << 10;
const uint64_t ZMM_K = 1ULL << 11;
const uint64_t AX = 1ULL << 12;
const uint64_t AL = 1ULL << 13;
const uint64_t IMM_1 = 1ULL << 14;
const uint64_t MEM8 = 1ULL << 15;
const uint64_t MEM16 = 1ULL << 16;
const uint64_t MEM32 = 1ULL << 17;
const uint64_t VM32Z = 1ULL << 19;
const uint64_t K_K = 1ULL << 20;
const uint64_t MEM_ONLY_DISP = 1ULL << 21;
const uint64_t VM32X_K = 1ULL << 23;
const uint64_t _YMM = 1ULL << 24;
const uint64_t VM32X_32 = 1ULL << 39;
const uint64_t VM32X_64 = 1ULL << 40;
const uint64_t VM32Y_32 = 1ULL << 41;
const uint64_t VM32Y_64 = 1ULL << 42;
const uint64_t VM32Z_K = 1ULL << 32;
#ifdef XBYAK64
const uint64 _MEMe = 1ULL << 25;
const uint64 REG32_2 = 1ULL << 26; // r8d, ...
const uint64 REG16_2 = 1ULL << 27; // r8w, ...
const uint64 REG8_2 = 1ULL << 28; // r8b, ...
const uint64 REG8_3 = 1ULL << 29; // spl, ...
const uint64 _REG64 = 1ULL << 30; // rax, ...
const uint64 _REG64_2 = 1ULL << 31; // r8, ...
const uint64 _XMM2 = 1ULL << 33;
const uint64 _YMM2 = 1ULL << 34;
const uint64 VM32X = VM32X_32 | VM32X_64;
const uint64 VM32Y = VM32Y_32 | VM32Y_64;
const uint64_t _MEMe = 1ULL << 25;
const uint64_t REG32_2 = 1ULL << 26; // r8d, ...
const uint64_t REG16_2 = 1ULL << 27; // r8w, ...
const uint64_t REG8_2 = 1ULL << 28; // r8b, ...
const uint64_t REG8_3 = 1ULL << 29; // spl, ...
const uint64_t _REG64 = 1ULL << 30; // rax, ...
const uint64_t _REG64_2 = 1ULL << 31; // r8, ...
const uint64_t _XMM2 = 1ULL << 33;
const uint64_t _YMM2 = 1ULL << 34;
const uint64_t VM32X = VM32X_32 | VM32X_64;
const uint64_t VM32Y = VM32Y_32 | VM32Y_64;
#else
const uint64 _MEMe = 0;
const uint64 REG32_2 = 0;
const uint64 REG16_2 = 0;
const uint64 REG8_2 = 0;
const uint64 REG8_3 = 0;
const uint64 _REG64 = 0;
const uint64 _REG64_2 = 0;
const uint64 _XMM2 = 0;
const uint64 _YMM2 = 0;
const uint64 VM32X = VM32X_32;
const uint64 VM32Y = VM32Y_32;
const uint64_t _MEMe = 0;
const uint64_t REG32_2 = 0;
const uint64_t REG16_2 = 0;
const uint64_t REG8_2 = 0;
const uint64_t REG8_3 = 0;
const uint64_t _REG64 = 0;
const uint64_t _REG64_2 = 0;
const uint64_t _XMM2 = 0;
const uint64_t _YMM2 = 0;
const uint64_t VM32X = VM32X_32;
const uint64_t VM32Y = VM32Y_32;
#endif
const uint64 REG64 = _REG64 | _REG64_2;
const uint64 REG32 = _REG32 | REG32_2 | EAX;
const uint64 REG16 = _REG16 | REG16_2 | AX;
const uint64 REG32e = REG32 | REG64;
const uint64 REG8 = _REG8 | REG8_2|AL;
const uint64 MEM = _MEM | _MEMe;
const uint64 MEM64 = 1ULL << 35;
const uint64 YMM_ER = 1ULL << 36;
const uint64 VM32Y_K = 1ULL << 37;
const uint64 IMM_2 = 1ULL << 38;
const uint64 IMM = IMM_1 | IMM_2;
const uint64 YMM = _YMM | _YMM2;
const uint64 K = 1ULL << 43;
const uint64 _ZMM = 1ULL << 44;
const uint64 _ZMM2 = 1ULL << 45;
const uint64_t REG64 = _REG64 | _REG64_2;
const uint64_t REG32 = _REG32 | REG32_2 | EAX;
const uint64_t REG16 = _REG16 | REG16_2 | AX;
const uint64_t REG32e = REG32 | REG64;
const uint64_t REG8 = _REG8 | REG8_2|AL;
const uint64_t MEM = _MEM | _MEMe;
const uint64_t MEM64 = 1ULL << 35;
const uint64_t YMM_ER = 1ULL << 36;
const uint64_t VM32Y_K = 1ULL << 37;
const uint64_t IMM_2 = 1ULL << 38;
const uint64_t IMM = IMM_1 | IMM_2;
const uint64_t YMM = _YMM | _YMM2;
const uint64_t K = 1ULL << 43;
const uint64_t _ZMM = 1ULL << 44;
const uint64_t _ZMM2 = 1ULL << 45;
#ifdef XBYAK64
const uint64 ZMM = _ZMM | _ZMM2;
const uint64 _YMM3 = 1ULL << 46;
const uint64_t ZMM = _ZMM | _ZMM2;
const uint64_t _YMM3 = 1ULL << 46;
#else
const uint64 ZMM = _ZMM;
const uint64 _YMM3 = 0;
const uint64_t ZMM = _ZMM;
const uint64_t _YMM3 = 0;
#endif
const uint64 K2 = 1ULL << 47;
const uint64 ZMM_SAE = 1ULL << 48;
const uint64 ZMM_ER = 1ULL << 49;
const uint64_t K2 = 1ULL << 47;
const uint64_t ZMM_SAE = 1ULL << 48;
const uint64_t ZMM_ER = 1ULL << 49;
#ifdef XBYAK64
const uint64 _XMM3 = 1ULL << 50;
const uint64_t _XMM3 = 1ULL << 50;
#else
const uint64 _XMM3 = 0;
const uint64_t _XMM3 = 0;
#endif
const uint64 XMM = _XMM | _XMM2 | _XMM3;
const uint64 XMM_SAE = 1ULL << 51;
const uint64_t XMM = _XMM | _XMM2 | _XMM3;
const uint64_t XMM_SAE = 1ULL << 51;
#ifdef XBYAK64
const uint64 XMM_KZ = 1ULL << 52;
const uint64 YMM_KZ = 1ULL << 53;
const uint64 ZMM_KZ = 1ULL << 54;
const uint64_t XMM_KZ = 1ULL << 52;
const uint64_t YMM_KZ = 1ULL << 53;
const uint64_t ZMM_KZ = 1ULL << 54;
#else
const uint64 XMM_KZ = 0;
const uint64 YMM_KZ = 0;
const uint64 ZMM_KZ = 0;
const uint64_t XMM_KZ = 0;
const uint64_t YMM_KZ = 0;
const uint64_t ZMM_KZ = 0;
#endif
const uint64 MEM_K = 1ULL << 55;
const uint64 M_1to2 = 1ULL << 56;
const uint64 M_1to4 = 1ULL << 57;
const uint64 M_1to8 = 1ULL << 58;
const uint64 M_1to16 = 1ULL << 59;
const uint64 XMM_ER = 1ULL << 60;
const uint64 M_xword = 1ULL << 61;
const uint64 M_yword = 1ULL << 62;
const uint64 MY_1to4 = 1ULL << 18;
const uint64_t MEM_K = 1ULL << 55;
const uint64_t M_1to2 = 1ULL << 56;
const uint64_t M_1to4 = 1ULL << 57;
const uint64_t M_1to8 = 1ULL << 58;
const uint64_t M_1to16 = 1ULL << 59;
const uint64_t XMM_ER = 1ULL << 60;
const uint64_t M_xword = 1ULL << 61;
const uint64_t M_yword = 1ULL << 62;
const uint64_t MY_1to4 = 1ULL << 18;
const uint64 NOPARA = 1ULL << (bitEnd - 1);
const uint64_t NOPARA = 1ULL << (bitEnd - 1);
class Test {
Test(const Test&);
@ -121,7 +121,7 @@ class Test {
const bool isXbyak_;
int funcNum_;
// check all op1, op2, op3
void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const
void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const
{
for (int i = 0; i < bitEnd; i++) {
if ((op1 & (1ULL << i)) == 0) continue;
@ -144,7 +144,7 @@ class Test {
}
}
}
void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const
void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const
{
for (int i = 0; i < bitEnd; i++) {
if ((op & (1ULL << i)) == 0) continue;
@ -156,7 +156,7 @@ class Test {
printf("\n");
}
}
void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const
void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const
{
if (nasm == 0) nasm = xbyak;
for (int i = 0; i < bitEnd; i++) {
@ -169,7 +169,7 @@ class Test {
printf("\n");
}
}
const char *get(uint64 type) const
const char *get(uint64_t type) const
{
int idx = (rand() / 31) & 7;
switch (type) {
@ -537,7 +537,7 @@ public:
printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
} else {
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
if (z) pz = "{z}";
if (z && kIdx) pz = "{z}";
printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
}
}

View File

@ -10,111 +10,111 @@ using namespace Xbyak;
const int bitEnd = 64;
const uint64 MMX = 1ULL << 0;
const uint64 _XMM = 1ULL << 1;
const uint64 _MEM = 1ULL << 2;
const uint64 _REG32 = 1ULL << 3;
const uint64 EAX = 1ULL << 4;
const uint64 IMM32 = 1ULL << 5;
const uint64 IMM8 = 1ULL << 6;
const uint64 _REG8 = 1ULL << 7;
const uint64 _REG16 = 1ULL << 8;
const uint64 NEG8 = 1ULL << 9;
const uint64 IMM16 = 1ULL << 10;
const uint64 NEG16 = 1ULL << 11;
const uint64 AX = 1ULL << 12;
const uint64 AL = 1ULL << 13;
const uint64 IMM_1 = 1ULL << 14;
const uint64 MEM8 = 1ULL << 15;
const uint64 MEM16 = 1ULL << 16;
const uint64 MEM32 = 1ULL << 17;
const uint64 ONE = 1ULL << 19;
const uint64 CL = 1ULL << 20;
const uint64 MEM_ONLY_DISP = 1ULL << 21;
const uint64 NEG32 = 1ULL << 23;
const uint64 _YMM = 1ULL << 24;
const uint64 VM32X_32 = 1ULL << 39;
const uint64 VM32X_64 = 1ULL << 40;
const uint64 VM32Y_32 = 1ULL << 41;
const uint64 VM32Y_64 = 1ULL << 42;
const uint64_t MMX = 1ULL << 0;
const uint64_t _XMM = 1ULL << 1;
const uint64_t _MEM = 1ULL << 2;
const uint64_t _REG32 = 1ULL << 3;
const uint64_t EAX = 1ULL << 4;
const uint64_t IMM32 = 1ULL << 5;
const uint64_t IMM8 = 1ULL << 6;
const uint64_t _REG8 = 1ULL << 7;
const uint64_t _REG16 = 1ULL << 8;
const uint64_t NEG8 = 1ULL << 9;
const uint64_t IMM16 = 1ULL << 10;
const uint64_t NEG16 = 1ULL << 11;
const uint64_t AX = 1ULL << 12;
const uint64_t AL = 1ULL << 13;
const uint64_t IMM_1 = 1ULL << 14;
const uint64_t MEM8 = 1ULL << 15;
const uint64_t MEM16 = 1ULL << 16;
const uint64_t MEM32 = 1ULL << 17;
const uint64_t ONE = 1ULL << 19;
const uint64_t CL = 1ULL << 20;
const uint64_t MEM_ONLY_DISP = 1ULL << 21;
const uint64_t NEG32 = 1ULL << 23;
const uint64_t _YMM = 1ULL << 24;
const uint64_t VM32X_32 = 1ULL << 39;
const uint64_t VM32X_64 = 1ULL << 40;
const uint64_t VM32Y_32 = 1ULL << 41;
const uint64_t VM32Y_64 = 1ULL << 42;
#ifdef XBYAK64
const uint64 _MEMe = 1ULL << 25;
const uint64 REG32_2 = 1ULL << 26; // r8d, ...
const uint64 REG16_2 = 1ULL << 27; // r8w, ...
const uint64 REG8_2 = 1ULL << 28; // r8b, ...
const uint64 REG8_3 = 1ULL << 29; // spl, ...
const uint64 _REG64 = 1ULL << 30; // rax, ...
const uint64 _REG64_2 = 1ULL << 31; // r8, ...
const uint64 RAX = 1ULL << 32;
const uint64 _XMM2 = 1ULL << 33;
const uint64 _YMM2 = 1ULL << 34;
const uint64 VM32X = VM32X_32 | VM32X_64;
const uint64 VM32Y = VM32Y_32 | VM32Y_64;
const uint64_t _MEMe = 1ULL << 25;
const uint64_t REG32_2 = 1ULL << 26; // r8d, ...
const uint64_t REG16_2 = 1ULL << 27; // r8w, ...
const uint64_t REG8_2 = 1ULL << 28; // r8b, ...
const uint64_t REG8_3 = 1ULL << 29; // spl, ...
const uint64_t _REG64 = 1ULL << 30; // rax, ...
const uint64_t _REG64_2 = 1ULL << 31; // r8, ...
const uint64_t RAX = 1ULL << 32;
const uint64_t _XMM2 = 1ULL << 33;
const uint64_t _YMM2 = 1ULL << 34;
const uint64_t VM32X = VM32X_32 | VM32X_64;
const uint64_t VM32Y = VM32Y_32 | VM32Y_64;
#else
const uint64 _MEMe = 0;
const uint64 REG32_2 = 0;
const uint64 REG16_2 = 0;
const uint64 REG8_2 = 0;
const uint64 REG8_3 = 0;
const uint64 _REG64 = 0;
const uint64 _REG64_2 = 0;
const uint64 RAX = 0;
const uint64 _XMM2 = 0;
const uint64 _YMM2 = 0;
const uint64 VM32X = VM32X_32;
const uint64 VM32Y = VM32Y_32;
const uint64_t _MEMe = 0;
const uint64_t REG32_2 = 0;
const uint64_t REG16_2 = 0;
const uint64_t REG8_2 = 0;
const uint64_t REG8_3 = 0;
const uint64_t _REG64 = 0;
const uint64_t _REG64_2 = 0;
const uint64_t RAX = 0;
const uint64_t _XMM2 = 0;
const uint64_t _YMM2 = 0;
const uint64_t VM32X = VM32X_32;
const uint64_t VM32Y = VM32Y_32;
#endif
const uint64 REG64 = _REG64 | _REG64_2 | RAX;
const uint64 REG32 = _REG32 | REG32_2 | EAX;
const uint64 REG16 = _REG16 | REG16_2 | AX;
const uint64 REG32e = REG32 | REG64;
const uint64 REG8 = _REG8 | REG8_2|AL;
const uint64 MEM = _MEM | _MEMe;
const uint64 MEM64 = 1ULL << 35;
const uint64 ST0 = 1ULL << 36;
const uint64 STi = 1ULL << 37;
const uint64 IMM_2 = 1ULL << 38;
const uint64 IMM = IMM_1 | IMM_2;
const uint64 XMM = _XMM | _XMM2;
const uint64 YMM = _YMM | _YMM2;
const uint64 K = 1ULL << 43;
const uint64 _ZMM = 1ULL << 44;
const uint64 _ZMM2 = 1ULL << 45;
const uint64_t REG64 = _REG64 | _REG64_2 | RAX;
const uint64_t REG32 = _REG32 | REG32_2 | EAX;
const uint64_t REG16 = _REG16 | REG16_2 | AX;
const uint64_t REG32e = REG32 | REG64;
const uint64_t REG8 = _REG8 | REG8_2|AL;
const uint64_t MEM = _MEM | _MEMe;
const uint64_t MEM64 = 1ULL << 35;
const uint64_t ST0 = 1ULL << 36;
const uint64_t STi = 1ULL << 37;
const uint64_t IMM_2 = 1ULL << 38;
const uint64_t IMM = IMM_1 | IMM_2;
const uint64_t XMM = _XMM | _XMM2;
const uint64_t YMM = _YMM | _YMM2;
const uint64_t K = 1ULL << 43;
const uint64_t _ZMM = 1ULL << 44;
const uint64_t _ZMM2 = 1ULL << 45;
#ifdef XBYAK64
const uint64 ZMM = _ZMM | _ZMM2;
const uint64 _YMM3 = 1ULL << 46;
const uint64_t ZMM = _ZMM | _ZMM2;
const uint64_t _YMM3 = 1ULL << 46;
#else
const uint64 ZMM = _ZMM;
const uint64 _YMM3 = 0;
const uint64_t ZMM = _ZMM;
const uint64_t _YMM3 = 0;
#endif
const uint64 K2 = 1ULL << 47;
const uint64 ZMM_SAE = 1ULL << 48;
const uint64 ZMM_ER = 1ULL << 49;
const uint64_t K2 = 1ULL << 47;
const uint64_t ZMM_SAE = 1ULL << 48;
const uint64_t ZMM_ER = 1ULL << 49;
#ifdef XBYAK64
const uint64 _XMM3 = 1ULL << 50;
const uint64_t _XMM3 = 1ULL << 50;
#endif
const uint64 XMM_SAE = 1ULL << 51;
const uint64_t XMM_SAE = 1ULL << 51;
#ifdef XBYAK64
const uint64 XMM_KZ = 1ULL << 52;
const uint64 YMM_KZ = 1ULL << 53;
const uint64 ZMM_KZ = 1ULL << 54;
const uint64_t XMM_KZ = 1ULL << 52;
const uint64_t YMM_KZ = 1ULL << 53;
const uint64_t ZMM_KZ = 1ULL << 54;
#else
const uint64 XMM_KZ = 0;
const uint64 YMM_KZ = 0;
const uint64 ZMM_KZ = 0;
const uint64_t XMM_KZ = 0;
const uint64_t YMM_KZ = 0;
const uint64_t ZMM_KZ = 0;
#endif
const uint64 MEM_K = 1ULL << 55;
const uint64 M_1to2 = 1ULL << 56;
const uint64 M_1to4 = 1ULL << 57;
const uint64 M_1to8 = 1ULL << 58;
const uint64 M_1to16 = 1ULL << 59;
const uint64 XMM_ER = 1ULL << 60;
const uint64 M_xword = 1ULL << 61;
const uint64 M_yword = 1ULL << 62;
const uint64 MY_1to4 = 1ULL << 18;
const uint64 BNDREG = 1ULL << 22;
const uint64_t MEM_K = 1ULL << 55;
const uint64_t M_1to2 = 1ULL << 56;
const uint64_t M_1to4 = 1ULL << 57;
const uint64_t M_1to8 = 1ULL << 58;
const uint64_t M_1to16 = 1ULL << 59;
const uint64_t XMM_ER = 1ULL << 60;
const uint64_t M_xword = 1ULL << 61;
const uint64_t M_yword = 1ULL << 62;
const uint64_t MY_1to4 = 1ULL << 18;
const uint64_t BNDREG = 1ULL << 22;
const uint64 NOPARA = 1ULL << (bitEnd - 1);
const uint64_t NOPARA = 1ULL << (bitEnd - 1);
class Test {
Test(const Test&);
@ -131,7 +131,7 @@ class Test {
}
// check all op1, op2, op3
void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const
void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const
{
for (int i = 0; i < bitEnd; i++) {
if ((op1 & (1ULL << i)) == 0) continue;
@ -154,7 +154,7 @@ class Test {
}
}
}
void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const
void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const
{
for (int i = 0; i < bitEnd; i++) {
if ((op & (1ULL << i)) == 0) continue;
@ -166,7 +166,7 @@ class Test {
printf("\n");
}
}
void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const
void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const
{
if (nasm == 0) nasm = xbyak;
for (int i = 0; i < bitEnd; i++) {
@ -179,7 +179,7 @@ class Test {
printf("\n");
}
}
const char *get(uint64 type) const
const char *get(uint64_t type) const
{
int idx = (rand() / 31) & 7;
if (type == ST0) {
@ -643,6 +643,7 @@ class Test {
puts(isXbyak_ ? "out_(dx, al); dump();" : "out dx, al");
puts(isXbyak_ ? "out_(dx, ax); dump();" : "out dx, ax");
puts(isXbyak_ ? "out_(dx, eax); dump();" : "out dx, eax");
puts(isXbyak_ ? "lea(eax, ptr[edi + 4 * eax]); dump();" : "lea eax, [edi + 4 * eax]");
}
void putJmp() const
{
@ -858,7 +859,7 @@ class Test {
SD = 1 << 3
};
const struct {
uint8 code;
uint8_t code;
const char *name;
} sufTbl[] = {
{ 0, "ps" },
@ -867,7 +868,7 @@ class Test {
{ 0xF2, "sd" },
};
static const struct XmmTbl1 {
uint8 code;
uint8_t code;
int mode;
const char *name;
bool hasImm;
@ -946,8 +947,8 @@ class Test {
{
static const struct Tbl {
const char *name;
uint64 op1;
uint64 op2;
uint64_t op1;
uint64_t op2;
} tbl[] = {
{ "cvtpi2ps", XMM, MMX|MEM },
{ "cvtps2pi", MMX, XMM|MEM },
@ -1145,6 +1146,33 @@ class Test {
put("pop", REG32|MEM32);
#endif
}
void putPushPop8_16() const
{
const struct {
int b;
uint32_t v;
} tbl[] = {
{ 8, 0x7f },
{ 8, 0x80 },
{ 8, 0xff },
{ 8, 0x100 },
{ 8, 0x12345 },
{ 16, 0x7fff },
{ 16, 0x8000 },
{ 16, 0xffff },
{ 16, 0x10000 },
{ 16, 0x12345 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const char *b = tbl[i].b == 8 ? "byte" : "word";
uint32_t v = tbl[i].v;
if (isXbyak_) {
printf("push(%s, 0x%x);dump();\n", b, v);
} else {
printf("push %s 0x%x\n", b, v);
}
}
}
void putTest() const
{
const char *p = "test";
@ -1466,9 +1494,9 @@ class Test {
void putMPX() const
{
#ifdef XBYAK64
const uint64 reg = REG64;
const uint64_t reg = REG64;
#else
const uint64 reg = REG32;
const uint64_t reg = REG32;
#endif
put("bndcl", BNDREG, reg|MEM);
put("bndcu", BNDREG, reg|MEM);
@ -2496,6 +2524,7 @@ public:
separateFunc();
putSSE4_2();
putSeg(); // same behavior as yasm for mov rax, cx
putPushPop8_16();
#else
putSIMPLE();
putReg1();
@ -2628,7 +2657,7 @@ public:
printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
} else {
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
if (z) pz = "{z}";
if (z && kIdx) pz = "{z}";
printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
}
}

View File

@ -97,13 +97,43 @@ CYBOZU_TEST_AUTO(align)
CYBOZU_TEST_EQUAL(size_t(getCurr()) % alignSize, 0u);
}
align(alignSize);
const uint8 *p = getCurr();
const uint8_t *p = getCurr();
// do nothing if aligned
align(alignSize);
CYBOZU_TEST_EQUAL(p, getCurr());
}
} c;
}
CYBOZU_TEST_AUTO(kmask)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
CYBOZU_TEST_EXCEPTION(kmovb(k1, ax), std::exception);
CYBOZU_TEST_EXCEPTION(kmovw(k1, ax), std::exception);
CYBOZU_TEST_EXCEPTION(kmovd(k1, ax), std::exception);
CYBOZU_TEST_EXCEPTION(kmovq(k1, eax), std::exception);
#ifdef XBYAK64
CYBOZU_TEST_EXCEPTION(kmovb(k1, rax), std::exception);
CYBOZU_TEST_EXCEPTION(kmovw(k1, rax), std::exception);
CYBOZU_TEST_EXCEPTION(kmovd(k1, rax), std::exception);
CYBOZU_TEST_NO_EXCEPTION(kmovq(k1, rax));
#endif
CYBOZU_TEST_NO_EXCEPTION(vmovaps(xm0|k0, ptr[eax]));
checkT_z();
}
void checkT_z()
{
const uint8_t *p1 = getCurr();
vmovaps(zm0, ptr[eax]);
const uint8_t *p2 = getCurr();
vmovaps(zm0|T_z, ptr[eax]);
const uint8_t *end = getCurr();
CYBOZU_TEST_EQUAL(p2 - p1, end - p2);
CYBOZU_TEST_EQUAL_ARRAY(p1, p2, end - p2);
}
} c;
}
#ifdef XBYAK64
CYBOZU_TEST_AUTO(vfmaddps)
@ -721,4 +751,96 @@ CYBOZU_TEST_AUTO(bf16)
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(AMX)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
ldtilecfg(ptr[rax + rcx * 4 + 64]);
sttilecfg(ptr[rsp + rax * 8 + 128]);
tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
tilerelease();
tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
tilezero(tmm7);
tdpbssd(tmm1, tmm2, tmm3);
tdpbsud(tmm2, tmm3, tmm4);
tdpbusd(tmm3, tmm4, tmm5);
tdpbuud(tmm4, tmm5, tmm6);
tdpbf16ps(tmm5, tmm6, tmm7);
}
} c;
// generated code by patch
const uint8_t tbl[] = {
0xc4, 0xe2, 0x78, 0x49, 0x44, 0x88, 0x40, 0xc4, 0xe2, 0x79, 0x49, 0x84, 0xc4, 0x80, 0x00, 0x00,
0x00, 0xc4, 0xe2, 0x7b, 0x4b, 0x5c, 0x57, 0x08, 0xc4, 0x82, 0x79, 0x4b, 0x64, 0x08, 0x20, 0xc4,
0xe2, 0x78, 0x49, 0xc0, 0xc4, 0x82, 0x7a, 0x4b, 0x54, 0x5a, 0x20, 0xc4, 0xe2, 0x7b, 0x49, 0xf8,
0xc4, 0xe2, 0x63, 0x5e, 0xca, 0xc4, 0xe2, 0x5a, 0x5e, 0xd3, 0xc4, 0xe2, 0x51, 0x5e, 0xdc, 0xc4,
0xe2, 0x48, 0x5e, 0xe5, 0xc4, 0xe2, 0x42, 0x5c, 0xee,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(tileloadd)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
tileloadd(tmm1, ptr[r8+r8]);
tileloadd(tmm1, ptr[rax+rcx*4]);
tileloadd(tmm1, ptr[r8+r9*1+0x40]);
}
void notSupported()
{
tileloadd(tmm1, ptr[r8]);
}
void notSupported2()
{
tileloadd(tmm1, ptr[r8*2]);
}
} c;
const uint8_t tbl[] = {
0xC4, 0x82, 0x7B, 0x4B, 0x0C, 0x00,
0xC4, 0xE2, 0x7B, 0x4B, 0x0C, 0x88,
0xC4, 0x82, 0x7B, 0x4B, 0x4C, 0x08, 0x40,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
// current version does not support this sibmem format
CYBOZU_TEST_EXCEPTION(c.notSupported(), std::exception);
CYBOZU_TEST_EXCEPTION(c.notSupported2(), std::exception);
}
CYBOZU_TEST_AUTO(vnni)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
// default encoding is EVEX
vpdpbusd(xm0, xm1, xm2);
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // EVEX
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX
}
void badVex()
{
vpdpbusd(xm0, xm1, xm31, VexEncoding);
}
} c;
const uint8_t tbl[] = {
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
0xC4, 0xE2, 0x71, 0x50, 0xC2,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
CYBOZU_TEST_EXCEPTION(c.badVex(), std::exception);
}
#endif

View File

@ -22,16 +22,19 @@ class ErrorSample : public CodeGenerator {
public:
void gen()
{
#ifndef XBYAK_NO_EXCEPTION
CYBOZU_TEST_EXCEPTION(mov(ptr[eax],1), std::exception);
CYBOZU_TEST_EXCEPTION(test(ptr[eax],1), std::exception);
CYBOZU_TEST_EXCEPTION(adc(ptr[eax],1), std::exception);
CYBOZU_TEST_EXCEPTION(setz(eax), std::exception);
#endif
}
};
int main()
{
CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 4u);
// the size of Operand exceeds 32 bit.
CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 8u);
Sample s;
s.gen();
ErrorSample es;

View File

@ -0,0 +1,111 @@
#define XBYAK_NO_EXCEPTION
#include <xbyak/xbyak.h>
using namespace Xbyak;
int g_err = 0;
int g_test = 0;
void assertEq(int x, int y)
{
if (x != y) {
printf("ERR x=%d y=%d\n", x, y);
g_err++;
}
g_test++;
}
void assertBool(bool b)
{
if (!b) {
printf("ERR assertBool\n");
g_err++;
}
g_test++;
}
void test1()
{
const int v = 123;
struct Code : CodeGenerator {
Code()
{
mov(eax, v);
ret();
}
} c;
int (*f)() = c.getCode<int (*)()>();
assertEq(f(), v);
assertEq(Xbyak::GetError(), ERR_NONE);
}
void test2()
{
struct Code : CodeGenerator {
Code()
{
Label lp;
L(lp);
L(lp);
}
} c;
assertEq(Xbyak::GetError(), ERR_LABEL_IS_REDEFINED);
Xbyak::ClearError();
}
void test3()
{
static struct EmptyAllocator : Xbyak::Allocator {
uint8_t *alloc() { return 0; }
} emptyAllocator;
struct Code : CodeGenerator {
Code() : CodeGenerator(8, 0, &emptyAllocator)
{
mov(eax, 3);
assertBool(Xbyak::GetError() == 0);
mov(eax, 3);
mov(eax, 3);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
assertBool(Xbyak::GetError() == 0);
}
} c;
}
void test4()
{
struct Code : CodeGenerator {
Code()
{
mov(ptr[eax], 1);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
test(ptr[eax], 1);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
adc(ptr[eax], 1);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
setz(eax);
assertBool(Xbyak::GetError() != 0);
Xbyak::ClearError();
}
};
}
int main()
{
test1();
test2();
test3();
test4();
if (g_err) {
printf("err %d/%d\n", g_err, g_test);
} else {
printf("all ok %d\n", g_test);
}
return g_err != 0;
}

View File

@ -6,7 +6,7 @@
#include <iostream>
#include <memory.h>
typedef unsigned char uint8;
typedef unsigned char uint8_t;
std::string normalize(const std::string& line)
{

View File

@ -218,7 +218,7 @@ void check(int x, int y)
}
}
void verify(const Xbyak::uint8 *f, int pNum)
void verify(const Xbyak::uint8_t *f, int pNum)
{
switch (pNum) {
case 0:
@ -264,7 +264,7 @@ void testAll()
}
for (int tNum = 0; tNum < maxNum; tNum++) {
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
const Xbyak::uint8 *f = code.getCurr();
const Xbyak::uint8_t *f = code.getCurr();
code.gen(pNum, tNum | opt, stackSize);
verify(f, pNum);
/*

View File

@ -12,7 +12,7 @@ g++ $CFLAGS address.cpp -o address
./address $1 > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
echo "xbyak"
./address $1 jit > nm.cpp

View File

@ -1,39 +1,44 @@
#!/bin/tcsh
#!/bin/sh
set FILTER="grep -v warning"
FILTER="grep -v warning"
if ($1 == "Y") then
case $1 in
Y)
echo "yasm(32bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK32"
set OPT3=win32
else if ($1 == "64") then
EXE=yasm
OPT2="-DUSE_YASM -DXBYAK32"
OPT3=win32
;;
64)
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "Y64") then
EXE=nasm
OPT2=-DXBYAK64
OPT3=win64
FILTER=./normalize_prefix
;;
Y64)
echo "yasm(64bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK64"
set OPT3=win64
set FILTER=./normalize_prefix
else
EXE=yasm
OPT2="-DUSE_YASM -DXBYAK64"
OPT3=win64
FILTER=./normalize_prefix
;;
*)
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
EXE=nasm
OPT2=-DXBYAK32
OPT3=win32
;;
esac
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
echo "compile make_nm.cpp"
g++ $CFLAGS make_nm.cpp -o make_nm
./make_nm > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst
awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
echo "xbyak"
./make_nm jit > nm.cpp

View File

@ -1,28 +1,31 @@
#!/bin/tcsh
#!/bin/sh
set FILTER="grep -v warning"
FILTER="grep -v warning"
if ($1 == "64") then
case $1 in
64)
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else
EXE=nasm
OPT2=-DXBYAK64
OPT3=win64
FILTER=./normalize_prefix
;;
*)
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
EXE=nasm
OPT2=-DXBYAK32
OPT3=win32
;;
esac
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
echo "compile make_512.cpp"
g++ $CFLAGS make_512.cpp -o make_512
./make_512 > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
echo "xbyak"
./make_512 jit > nm.cpp

View File

@ -17,6 +17,10 @@ if /i "%1"=="Y" (
set OPT2=-DUSE_YASM -DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="noexcept" (
set EXE=nasm.exe
set OPT2=-DXBYAK32 -DXBYAK_NO_EXCEPTION
set OPT3=win32
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
@ -39,4 +43,4 @@ if /i "%Y%"=="1" (
make_nm jit > nm.cpp
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff -wb x.lst ok.lst && echo "ok"
diff -wb x.lst ok.lst && echo "ok"

View File

@ -1,45 +1,57 @@
#!/bin/tcsh
#!/bin/sh
set FILTER=cat
FILTER=cat
if ($1 == "Y") then
case $1 in
Y)
echo "yasm(32bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK32"
set OPT3=win32
else if ($1 == "64") then
EXE=yasm
OPT2="-DUSE_YASM -DXBYAK32"
OPT3=win32
;;
64)
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "Y64") then
EXE=nasm
OPT2=-DXBYAK64
OPT3=win64
FILTER=./normalize_prefix
;;
Y64)
echo "yasm(64bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK64"
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "avx512") then
EXE=yasm
OPT2="-DUSE_YASM -DXBYAK64"
OPT3=win64
FILTER=./normalize_prefix
;;
avx512)
echo "nasm(64bit) + avx512"
set EXE=nasm
set OPT2="-DXBYAK64 -DUSE_AVX512"
set OPT3=win64
set FILTER=./normalize_prefix
else
EXE=nasm
OPT2="-DXBYAK64 -DUSE_AVX512"
OPT3=win64
FILTER=./normalize_prefix
;;
noexcept)
echo "nasm(32bit) without exception"
EXE=nasm
OPT2="-DXBYAK32 -DXBYAK_NO_EXCEPTION"
OPT3=win32
;;
*)
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
EXE=nasm
OPT2=-DXBYAK32
OPT3=win32
;;
esac
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
echo "compile make_nm.cpp"
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
echo "compile make_nm.cpp with $CFLAGS"
g++ $CFLAGS make_nm.cpp -o make_nm
./make_nm > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst
awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
echo "xbyak"
./make_nm jit > nm.cpp

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,16 @@
#ifndef XBYAK_XBYAK_UTIL_H_
#define XBYAK_XBYAK_UTIL_H_
#ifdef XBYAK_ONLY_CLASS_CPU
#include <stdint.h>
#include <stdlib.h>
#include <algorithm>
#include <assert.h>
#ifndef XBYAK_THROW
#define XBYAK_THROW(x) ;
#define XBYAK_THROW_RET(x, y) return y;
#endif
#else
#include <string.h>
/**
@ -9,6 +20,7 @@
@note this header is UNDER CONSTRUCTION!
*/
#include "xbyak.h"
#endif // XBYAK_ONLY_CLASS_CPU
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define XBYAK_INTEL_CPU_SPECIFIC
@ -80,7 +92,7 @@ typedef enum {
CPU detection class
*/
class Cpu {
uint64 type_;
uint64_t type_;
//system topology
bool x2APIC_supported_;
static const size_t maxTopologyLevels = 2;
@ -219,24 +231,24 @@ public:
int displayFamily; // family + extFamily
int displayModel; // model + extModel
unsigned int getNumCores(IntelCpuTopologyLevel level) {
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
unsigned int getNumCores(IntelCpuTopologyLevel level) const {
if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
switch (level) {
case SmtLevel: return numCores_[level - 1];
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
default: XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
}
}
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
unsigned int getCoresSharingDataCache(unsigned int i) const
{
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
return coresSharignDataCache_[i];
}
unsigned int getDataCacheSize(unsigned int i) const
{
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
return dataCacheSize_[i];
}
@ -270,7 +282,7 @@ public:
(void)data;
#endif
}
static inline uint64 getXfeature()
static inline uint64_t getXfeature()
{
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
@ -280,13 +292,13 @@ public:
// xgetvb is not support on gcc 4.2
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64)edx << 32) | eax;
return ((uint64_t)edx << 32) | eax;
#endif
#else
return 0;
#endif
}
typedef uint64 Type;
typedef uint64_t Type;
static const Type NONE = 0;
static const Type tMMX = 1 << 0;
@ -323,36 +335,40 @@ public:
static const Type tADX = 1 << 28; // adcx, adox
static const Type tRDSEED = 1 << 29; // rdseed
static const Type tSMAP = 1 << 30; // stac
static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest
static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort
static const Type tF16C = uint64(1) << 33; // vcvtph2ps, vcvtps2ph
static const Type tMOVBE = uint64(1) << 34; // mobve
static const Type tAVX512F = uint64(1) << 35;
static const Type tAVX512DQ = uint64(1) << 36;
static const Type tAVX512_IFMA = uint64(1) << 37;
static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest
static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort
static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph
static const Type tMOVBE = uint64_t(1) << 34; // mobve
static const Type tAVX512F = uint64_t(1) << 35;
static const Type tAVX512DQ = uint64_t(1) << 36;
static const Type tAVX512_IFMA = uint64_t(1) << 37;
static const Type tAVX512IFMA = tAVX512_IFMA;
static const Type tAVX512PF = uint64(1) << 38;
static const Type tAVX512ER = uint64(1) << 39;
static const Type tAVX512CD = uint64(1) << 40;
static const Type tAVX512BW = uint64(1) << 41;
static const Type tAVX512VL = uint64(1) << 42;
static const Type tAVX512_VBMI = uint64(1) << 43;
static const Type tAVX512PF = uint64_t(1) << 38;
static const Type tAVX512ER = uint64_t(1) << 39;
static const Type tAVX512CD = uint64_t(1) << 40;
static const Type tAVX512BW = uint64_t(1) << 41;
static const Type tAVX512VL = uint64_t(1) << 42;
static const Type tAVX512_VBMI = uint64_t(1) << 43;
static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
static const Type tAVX512_4VNNIW = uint64(1) << 44;
static const Type tAVX512_4FMAPS = uint64(1) << 45;
static const Type tPREFETCHWT1 = uint64(1) << 46;
static const Type tPREFETCHW = uint64(1) << 47;
static const Type tSHA = uint64(1) << 48;
static const Type tMPX = uint64(1) << 49;
static const Type tAVX512_VBMI2 = uint64(1) << 50;
static const Type tGFNI = uint64(1) << 51;
static const Type tVAES = uint64(1) << 52;
static const Type tVPCLMULQDQ = uint64(1) << 53;
static const Type tAVX512_VNNI = uint64(1) << 54;
static const Type tAVX512_BITALG = uint64(1) << 55;
static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
static const Type tAVX512_BF16 = uint64(1) << 57;
static const Type tAVX512_VP2INTERSECT = uint64(1) << 58;
static const Type tAVX512_4VNNIW = uint64_t(1) << 44;
static const Type tAVX512_4FMAPS = uint64_t(1) << 45;
static const Type tPREFETCHWT1 = uint64_t(1) << 46;
static const Type tPREFETCHW = uint64_t(1) << 47;
static const Type tSHA = uint64_t(1) << 48;
static const Type tMPX = uint64_t(1) << 49;
static const Type tAVX512_VBMI2 = uint64_t(1) << 50;
static const Type tGFNI = uint64_t(1) << 51;
static const Type tVAES = uint64_t(1) << 52;
static const Type tVPCLMULQDQ = uint64_t(1) << 53;
static const Type tAVX512_VNNI = uint64_t(1) << 54;
static const Type tAVX512_BITALG = uint64_t(1) << 55;
static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56;
static const Type tAVX512_BF16 = uint64_t(1) << 57;
static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58;
static const Type tAMX_TILE = uint64_t(1) << 59;
static const Type tAMX_INT8 = uint64_t(1) << 60;
static const Type tAMX_BF16 = uint64_t(1) << 61;
static const Type tAVX_VNNI = uint64_t(1) << 62;
Cpu()
: type_(NONE)
@ -374,19 +390,35 @@ public:
if (ECX == get32bitAsBE(amd)) {
type_ |= tAMD;
getCpuid(0x80000001, data);
if (EDX & (1U << 31)) type_ |= t3DN;
if (EDX & (1U << 15)) type_ |= tCMOV;
if (EDX & (1U << 30)) type_ |= tE3DN;
if (EDX & (1U << 22)) type_ |= tMMX2;
if (EDX & (1U << 27)) type_ |= tRDTSCP;
if (EDX & (1U << 31)) {
type_ |= t3DN;
// 3DNow! implies support for PREFETCHW on AMD
type_ |= tPREFETCHW;
}
if (EDX & (1U << 29)) {
// Long mode implies support for PREFETCHW on AMD
type_ |= tPREFETCHW;
}
}
if (ECX == get32bitAsBE(intel)) {
type_ |= tINTEL;
}
// Extended flags information
getCpuid(0x80000000, data);
if (EAX >= 0x80000001) {
getCpuid(0x80000001, data);
if (EDX & (1U << 31)) type_ |= t3DN;
if (EDX & (1U << 30)) type_ |= tE3DN;
if (EDX & (1U << 27)) type_ |= tRDTSCP;
if (EDX & (1U << 22)) type_ |= tMMX2;
if (EDX & (1U << 15)) type_ |= tCMOV;
if (ECX & (1U << 5)) type_ |= tLZCNT;
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
}
getCpuid(1, data);
if (ECX & (1U << 0)) type_ |= tSSE3;
if (ECX & (1U << 9)) type_ |= tSSSE3;
@ -407,11 +439,15 @@ public:
if (type_ & tOSXSAVE) {
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
uint64 bv = getXfeature();
uint64_t bv = getXfeature();
if ((bv & 6) == 6) {
if (ECX & (1U << 28)) type_ |= tAVX;
if (ECX & (1U << 12)) type_ |= tFMA;
if (((bv >> 5) & 7) == 7) {
// do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support
#if !defined(__APPLE__)
if (((bv >> 5) & 7) == 7)
#endif
{
getCpuidEx(7, 0, data);
if (EBX & (1U << 16)) type_ |= tAVX512F;
if (type_ & tAVX512F) {
@ -434,16 +470,12 @@ public:
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
}
// EAX=07H, ECX=1
getCpuidEx(7, 1, data);
if (type_ & tAVX512F) {
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
}
}
}
}
if (maxNum >= 7) {
getCpuidEx(7, 0, data);
const uint32_t maxNumSubLeaves = EAX;
if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2;
if (EBX & (1U << 3)) type_ |= tBMI1;
if (EBX & (1U << 8)) type_ |= tBMI2;
@ -456,6 +488,16 @@ public:
if (EBX & (1U << 14)) type_ |= tMPX;
if (EBX & (1U << 29)) type_ |= tSHA;
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
if (maxNumSubLeaves >= 1) {
getCpuidEx(7, 1, data);
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
if (type_ & tAVX512F) {
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
}
}
}
setFamily();
setNumCores();
@ -463,9 +505,11 @@ public:
}
void putFamily() const
{
#ifndef XBYAK_ONLY_CLASS_CPU
printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n",
family, model, stepping, extFamily, extModel);
printf("display:family=%X, model=%X\n", displayFamily, displayModel);
#endif
}
bool has(Type type) const
{
@ -473,9 +517,10 @@ public:
}
};
#ifndef XBYAK_ONLY_CLASS_CPU
class Clock {
public:
static inline uint64 getRdtsc()
static inline uint64_t getRdtsc()
{
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
@ -483,7 +528,7 @@ public:
#else
unsigned int eax, edx;
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
return ((uint64)edx << 32) | eax;
return ((uint64_t)edx << 32) | eax;
#endif
#else
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
@ -505,10 +550,10 @@ public:
count_++;
}
int getCount() const { return count_; }
uint64 getClock() const { return clock_; }
uint64_t getClock() const { return clock_; }
void clear() { count_ = 0; clock_ = 0; }
private:
uint64 clock_;
uint64_t clock_;
int count_;
};
@ -558,7 +603,7 @@ public:
{
if (n_ == maxTblNum) {
fprintf(stderr, "ERR Pack::can't append\n");
throw Error(ERR_BAD_PARAMETER);
XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this)
}
tbl_[n_++] = &t;
return *this;
@ -567,7 +612,7 @@ public:
{
if (n > maxTblNum) {
fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n);
throw Error(ERR_BAD_PARAMETER);
XBYAK_THROW(ERR_BAD_PARAMETER)
}
n_ = n;
for (size_t i = 0; i < n; i++) {
@ -578,7 +623,7 @@ public:
{
if (n >= n_) {
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
throw Error(ERR_BAD_PARAMETER);
XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax)
}
return *tbl_[n];
}
@ -591,7 +636,7 @@ public:
if (num == size_t(-1)) num = n_ - pos;
if (pos + num > n_) {
fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num);
throw Error(ERR_BAD_PARAMETER);
XBYAK_THROW_RET(ERR_BAD_PARAMETER, Pack())
}
Pack pack;
pack.n_ = num;
@ -666,9 +711,9 @@ public:
, t(t_)
{
using namespace Xbyak;
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
if (pNum < 0 || pNum > 4) XBYAK_THROW(ERR_BAD_PNUM)
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
const Reg64& _rsp = code->rsp;
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
const int *tbl = getOrderTbl() + noSaveNum;
@ -874,6 +919,8 @@ public:
startAddr_ = endAddr;
}
};
#endif // XBYAK_ONLY_CLASS_CPU
} } // end of util
#endif

View File

@ -4,7 +4,6 @@
//#define CANONICAL_TEST
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
using namespace Xbyak::util;
@ -38,7 +37,6 @@ static void (*mainloop)();
static void (*handleException)();
u32 mem_writes, mem_reads;
u32 mem_rewrites_w, mem_rewrites_r;
static u64 jmp_rsp;

View File

@ -16,9 +16,7 @@
You should have received a copy of the GNU General Public License
along with reicast. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef CORE_REC_X64_X64_REGALLOC_H_
#define CORE_REC_X64_X64_REGALLOC_H_
#pragma once
//#define OLD_REGALLOC
@ -91,5 +89,3 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8>
BlockCompiler *compiler;
};
#endif /* CORE_REC_X64_X64_REGALLOC_H_ */

View File

@ -67,9 +67,9 @@ void X86RegAlloc::Writeback_FPU(u32 reg, s8 nreg)
struct DynaRBI : RuntimeBlockInfo
{
virtual u32 Relink() override;
u32 Relink() override;
virtual void Relocate(void* dst) override {
void Relocate(void* dst) override {
verify(false);
}
};

View File

@ -16,10 +16,8 @@
You should have received a copy of the GNU General Public License
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#include "build.h"
#pragma once
#define XBYAK32
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>

View File

@ -32,10 +32,10 @@ struct X86RegAlloc : RegAlloc<Xbyak::Operand::Code, s8>
{
X86RegAlloc(X86Compiler *compiler) : compiler(compiler) {}
virtual void Preload(u32 reg, Xbyak::Operand::Code nreg) override;
virtual void Writeback(u32 reg, Xbyak::Operand::Code nreg) override;
virtual void Preload_FPU(u32 reg, s8 nreg) override;
virtual void Writeback_FPU(u32 reg, s8 nreg) override;
void Preload(u32 reg, Xbyak::Operand::Code nreg) override;
void Writeback(u32 reg, Xbyak::Operand::Code nreg) override;
void Preload_FPU(u32 reg, s8 nreg) override;
void Writeback_FPU(u32 reg, s8 nreg) override;
void doAlloc(RuntimeBlockInfo* block);