deps: Update xbyak to version 5.891

This commit is contained in:
scribam 2020-03-22 10:08:05 +01:00
parent 484a7cdd10
commit fef96c125a
79 changed files with 15766 additions and 277 deletions

View File

@ -139,7 +139,7 @@ endif
RZDCY_CFLAGS += -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/rend/gles -I$(RZDCY_SRC_DIR)/deps \
-I$(RZDCY_SRC_DIR)/deps/vixl -I$(RZDCY_SRC_DIR)/khronos -I$(RZDCY_SRC_DIR)/deps/glslang \
-I$(RZDCY_SRC_DIR)/deps/glm
-I$(RZDCY_SRC_DIR)/deps/glm -I$(RZDCY_SRC_DIR)/deps/xbyak
ifdef USE_MODEM
RZDCY_CFLAGS += -DENABLE_MODEM -I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules

View File

@ -0,0 +1,12 @@
sudo: true
dist: bionic
language: cpp
compiler:
- gcc
- clang
addons:
apt:
packages:
- nasm yasm g++-multilib tcsh
script:
- make test

View File

@ -0,0 +1,6 @@
cmake_minimum_required(VERSION 2.6)
project(xbyak)
file(GLOB headers xbyak/*.h)
install(FILES ${headers} DESTINATION include/xbyak)

View File

@ -1,47 +1,47 @@
Copyright (c) 2007 MITSUNARI Shigeo
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of the copyright owner nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
す場合に限り、再頒布および使用が許可されます。
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
を含めること。
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
権表示、本条件一覧、および下記免責条項を含めること。
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
一切責任を負わないものとします。
Copyright (c) 2007 MITSUNARI Shigeo
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of the copyright owner nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
す場合に限り、再頒布および使用が許可されます。
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
を含めること。
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
権表示、本条件一覧、および下記免責条項を含めること。
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
一切責任を負わないものとします。

24
core/deps/xbyak/Makefile Normal file
View File

@ -0,0 +1,24 @@
PREFIX=/usr/local
INSTALL_DIR=$(PREFIX)/include/xbyak
all:
$(MAKE) -C sample
clean:
$(MAKE) -C sample clean
install:
mkdir -p $(INSTALL_DIR)
cp -pR xbyak/*.h $(INSTALL_DIR)
uninstall:
rm -i $(INSTALL_DIR)/*.h
rmdir $(INSTALL_DIR)
update:
$(MAKE) -C gen
test:
$(MAKE) -C test test
.PHONY: test update

577
core/deps/xbyak/readme.md Normal file
View File

@ -0,0 +1,577 @@
[![Build Status](https://travis-ci.org/herumi/xbyak.png)](https://travis-ci.org/herumi/xbyak)
# Xbyak 5.891 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
## Abstract
Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
## Feature
* header file only
* Intel/MASM like syntax
* fully support AVX-512
**Note**:
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
### Supported OS
* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
* Linux(32bit, 64bit)
* Intel macOS
### Supported Compilers
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
## Install
The following files are necessary. Please add the path to your compile directory.
* xbyak.h
* xbyak_mnemonic.h
* xbyak_util.h
Linux:
```
make install
```
These files are copied into `/usr/local/include/xbyak`.
## How to use it
Inherit `Xbyak::CodeGenerator` class and make the class method.
```
#include <xbyak/xbyak.h>
struct Code : Xbyak::CodeGenerator {
Code(int x)
{
mov(eax, x);
ret();
}
};
```
Or you can pass the instance of CodeGenerator without inheriting.
```
void genCode(Xbyak::CodeGenerator& code, int x) {
using namespace Xbyak::util;
code.mov(eax, x);
code.ret();
}
```
Make an instance of the class and get the function
pointer by calling `getCode()` and call it.
```
Code c(5);
int (*f)() = c.getCode<int (*)()>();
printf("ret=%d\n", f()); // ret = 5
```
## Syntax
Similar to MASM/NASM syntax with parentheses.
```
NASM Xbyak
mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx);
ret --> ret();
```
## Addressing
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
otherwise use `ptr`.
```
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
[rip + 32bit disp] ; x64 only
NASM Xbyak
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
test byte [esp], 4 --> test(byte [esp], 4);
inc qword [rax] --> inc(qword [rax]);
```
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
### How to use Selector (Segment Register)
```
mov eax, [fs:eax] --> putSeg(fs);
mov(eax, ptr [eax]);
mov ax, cs --> mov(ax, cs);
```
**Note**: Segment class is not derived from `Operand`.
## AVX
```
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
```
**Note**:
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
But the newer version will not support it.
```
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
```
## AVX-512
```
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]);
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
```
### Remark
* `k1`, ..., `k7` are opmask registers.
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
* `k4 | k3` is different from `k3 | k4`.
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
## Label
Two kinds of Label are supported. (String literal and Label class).
### String literal
```
L("L1");
jmp("L1");
jmp("L2");
...
a few mnemonics (8-bit displacement jmp)
...
L("L2");
jmp("L3", T_NEAR);
...
a lot of mnemonics (32-bit displacement jmp)
...
L("L3");
```
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
### Support `@@`, `@f`, `@b` like MASM
```
L("@@"); // <A>
jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B>
L("@@"); // <B>
jmp("@b"); // jmp to <B>
mov(eax, "@b");
jmp(eax); // jmp to <B>
```
### Local label
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
are treated as a local label.
`inLocalLabel()` and `outLocalLabel()` can be nested.
```
void func1()
{
inLocalLabel();
L(".lp"); // <A> ; local label
...
jmp(".lp"); // jmp to <A>
L("aaa"); // global label <C>
outLocalLabel();
inLocalLabel();
L(".lp"); // <B> ; local label
func1();
jmp(".lp"); // jmp to <B>
inLocalLabel();
jmp("aaa"); // jmp to <C>
}
```
### short and long jump
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.
```
jmp("short-jmp"); // short jmp
// small code
L("short-jmp");
jmp("long-jmp");
// long code
L("long-jmp"); // throw exception
```
Then specify T_NEAR for jmp.
```
jmp("long-jmp", T_NEAR); // long jmp
// long code
L("long-jmp");
```
Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
```
jmp("long-jmp"); // long jmp
// long code
L("long-jmp");
```
### Label class
`L()` and `jxx()` support Label class.
```
Xbyak::Label label1, label2;
L(label1);
...
jmp(label1);
...
jmp(label2);
...
L(label2);
```
Use `putL` for jmp table
```
Label labelTbl, L0, L1, L2;
mov(rax, labelTbl);
// rdx is an index of jump table
jmp(ptr [rax + rdx * sizeof(void*)]);
L(labelTbl);
putL(L0);
putL(L1);
putL(L2);
L(L0);
....
L(L1);
....
```
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
```
Label label2;
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
...
jmp(label2); // label2 is not determined here
...
assignL(label2, label1); // label2 <- label1
```
The `jmp` in the above code jumps to label1 assigned by `assignL`.
**Note**:
* srcLabel must be used in `L()`.
* dstLabel must not be used in `L()`.
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
```
// not AutoGrow mode
Label label;
assert(label.getAddress() == 0);
L(label);
assert(label.getAddress() == getCurr());
```
### Rip ; relative addressing
```
Label label;
mov(eax, ptr [rip + label]); // eax = 4
...
L(label);
dd(4);
```
```
int x;
...
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
```
## Code size
The default max code size is 4096 bytes.
Specify the size in constructor of `CodeGenerator()` if necessary.
```
class Quantize : public Xbyak::CodeGenerator {
public:
Quantize()
: CodeGenerator(8192)
{
}
...
};
```
## User allocated memory
You can make jit code on prepaired memory.
Call `setProtectModeRE` yourself to change memory mode if using the prepaired memory.
```
uint8_t alignas(4096) buf[8192]; // C++11 or later
struct Code : Xbyak::CodeGenerator {
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
{
mov(rax, 123);
ret();
}
};
int main()
{
Code c;
c.setProtectModeRE(); // set memory to Read/Exec
printf("%d\n", c.getCode<int(*)()>()());
}
```
**Note**: See [sample/test0.cpp](sample/test0.cpp).
### AutoGrow
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
```
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
{
...
}
};
Code c;
// generate code for jit
c.ready(); // mode = Read/Write/Exec
```
**Note**:
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
### Read/Exec mode
Xbyak set Read/Write/Exec mode to memory to run jit code.
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
call `setProtectModeRE()` after generating jit code.
```
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
{
mov(eax, 123);
ret();
}
};
Code c;
c.setProtectModeRE();
...
```
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
See [protect-re.cpp](sample/protect-re.cpp).
## Macro
* **XBYAK32** is defined on 32bit.
* **XBYAK64** is defined on 64bit.
* **XBYAK64_WIN** is defined on 64bit Windows(VC)
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin
* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, ....
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future)
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro
## Sample
* [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64)
* [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
* [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
* [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64)
## License
modified new BSD License
http://opensource.org/licenses/BSD-3-Clause
## History
* 2020/Feb/26 ver 5.891 fix typo of type
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
* 2019/Dec/20 ver 5.88 fix compile error on Windows
* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified.
* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available)
* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later
* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined
* 2019/Oct/12 ver 5.83 exit(1) was removed
* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam)
* 2019/Sep/14 ver 5.81 support some generic mnemonics.
* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov)
* 2019/May/27 support vp2intersectd, vp2intersectq (not tested)
* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky)
* 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage)
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
* 2018/Sep/04 ver 5.71 L() returns a new label instance
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
* 2018/Jul/26 ver 5.661 support mingw64
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
* 2018/Jun/26 ver 5.65 fix push(qword [mem])
* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage)
* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen)
* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan)
* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan)
* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso)
* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio
* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar)
* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed)
* 2017/May/13 ver 5.42 add movs{b,w,d,q}
* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso)
* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label
* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso)
* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N
* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro)
* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW
* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso)
* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso)
* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38)
* 2016/Nov/20 ver 5.10 add addressing [rip+&var]
* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio)
* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h
* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4
* 2016/Aug/03 ver 5.01 disable omitted operand
* 2016/Jun/24 ver 5.00 support avx-512 instruction set
* 2016/Jun/13 avx-512 add mask instructions
* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu
* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp)
* 2016/Feb/04 ver 4.90 add jcc(const void *addr);
* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell)
* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere)
* 2015/Oct/05 ver 4.87 support segment selectors
* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere)
* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen)
* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff)
* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik)
* 2015/May/24 ver 4.82 support detection of F16C
* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere)
* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere)
* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac
* 2014/Oct/14 ver 4.70 support MmapAllocator
* 2014/Jun/13 ver 4.62 disable warning of VC2014
* 2014/May/30 ver 4.61 support bt, bts, btr, btc
* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph
* 2014/Apr/11 ver 4.52 add detection of rdrand
* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels
* 2014/Mar/16 ver 4.50 support new Label
* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox
* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64()
* 2013/Oct/16 ver 4.21 label support std::string
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
* 2013/Jan/15 ver 3.75 add setSize() to modify generated code
* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect()
* 2013/Jan/06 ver 3.73 use unordered_map if possible
* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const.
* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined.
* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util.
* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias)
* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit
* 2012/Nov/01 ver 3.61 add fldcw/fstcw
* 2012/May/03 ver 3.60 change interface of Allocator
* 2012/Mar/23 ver 3.51 fix userPtr mode
* 2012/Mar/19 ver 3.50 support AutoGrow mode
* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd
* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat)
* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya)
* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc
* 2011/May/24 ver 3.01 fix typo of OSXSAVE
* 2011/May/23 ver 3.00 add vcmpeqps and so on
* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm
* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
* 2011/Feb/04 ver 2.99 beta support AVX
* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
* 2010/Jun/07 ver 2.29 fix call(<label>)
* 2010/Jun/17 ver 2.28 move some member functions to public
* 2010/Jun/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
* 2010/May/24 ver 2.26 fix sub(rsp, 1000)
* 2010/Apr/26 ver 2.25 add jc/jnc(I forgot to implement them...)
* 2010/Apr/16 ver 2.24 change the prototype of rewrite() method
* 2010/Apr/15 ver 2.23 fix align() and xbyak_util.h for Mac
* 2010/Feb/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
* 2009/Dec/09 ver 2.21 support cygwin(gcc 4.3.2)
* 2009/Nov/28 support a part of FPU
* 2009/Jun/25 fix mov(qword[rax], imm); (thanks to Martin)
* 2009/Mar/10 fix redundant REX.W prefix on jmp/call reg64
* 2009/Feb/24 add movq reg64, mmx/xmm; movq mmx/xmm, reg64
* 2009/Feb/13 movd(xmm7, dword[eax]) drops 0x66 prefix (thanks to Gabest)
* 2008/Dec/30 fix call in short relative address(thanks to kato san)
* 2008/Sep/18 support @@, @f, @b and localization of label(thanks to nobu-q san)
* 2008/Sep/18 support (ptr[rip + 32bit offset]) (thanks to Dango-Chu san)
* 2008/Jun/03 fix align(). mov(ptr[eax],1) throws ERR_MEM_SIZE_IS_NOT_SPECIFIED.
* 2008/Jun/02 support memory interface allocated by user
* 2008/May/26 fix protect() to avoid invalid setting(thanks to shinichiro_h san)
* 2008/Apr/30 add cmpxchg16b, cdqe
* 2008/Apr/29 support x64
* 2008/Apr/14 code refactoring
* 2008/Mar/12 add bsr/bsf
* 2008/Feb/14 fix output of sub eax, 1234 (thanks to Robert)
* 2007/Nov/5 support lock, xadd, xchg
* 2007/Nov/2 support SSSE3/SSE4 (thanks to Dango-Chu san)
* 2007/Feb/4 fix the bug that exception doesn't occur under the condition which the offset of jmp mnemonic without T_NEAR is over 127.
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
* 2007/Jan/4 first version
## Author
MITSUNARI Shigeo(herumi@nifty.com)

534
core/deps/xbyak/readme.txt Normal file
View File

@ -0,0 +1,534 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.891
-----------------------------------------------------------------------------
◎概要
これはx86, x64(AMD64, x86-64)のマシン語命令を生成するC++のクラスライブラリです。
プログラム実行時に動的にアセンブルすることが可能です。
-----------------------------------------------------------------------------
◎特徴
・ヘッダファイルオンリー
xbyak.hをインクルードするだけですぐ利用することができます。
C++の枠組み内で閉じているため、外部アセンブラは不要です。
32bit/64bit両対応です。
対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/VEX-encoded GPR
・Windows Xp(32bit, 64bit), Windows 7/Linux(32bit, 64bit)/Intel Mac対応
Windows Xp, Windows 7上ではVC2008, VC2010, VC2012
Linux (kernel 3.8)上ではgcc 4.7.3, clang 3.3
Intel Mac
などで動作確認をしています。
※ and, orなどの代わりにand_, or_を使用してください。
and, orなどを使いたい場合は-fno-operator-namesをgcc/clangに指定してください。
-----------------------------------------------------------------------------
◎準備
xbyak.h
xbyak_bin2hex.h
xbyak_mnemonic.h
これらを同一のパスに入れてインクルードパスに追加してください。
Linuxではmake installで/usr/local/include/xbyakにコピーされます。
-----------------------------------------------------------------------------
◎下位互換性の破れ
* Xbyak::Errorの型をenumからclassに変更
** 従来のenumの値をとるにはintにキャストしてください。
* (古い)Reg32eクラスを(新しい)Reg32eとRegExpに分ける。
** (新しい)Reg32eはReg32かReg64
** (新しい)RegExpは'Reg32e + (Reg32e|Xmm|Ymm) * scale + disp'の型
-----------------------------------------------------------------------------
◎新機能
MmapAllocator追加
これはUnix系OSでのみの仕様です。XBYAK_USE_MMAP_ALLOCATORを使うと利用できます。
デフォルトのAllocatorはメモリ確保時にposix_memalignを使います。
この領域に対するmprotectはmap countを減らします。
map countの最大値は/proc/sys/vm/max_map_countに書かれています。
デフォルトでは3万個ほどのXbyak::CodeGeneratorインスタンスを生成するとエラーになります。
test/mprotect_test.cppで確認できます。
これを避けるためにはmmapを使うMmapAllocatorを使ってください。
将来この挙動がデフォルトになるかもしれません。
AutoGrowモード追加
これはメモリ伸長を動的に行うモードです。
今まではXbyak::CodeGenerator()に渡したメモリサイズを超えると例外が発生して
いましたが、このモードでは内部でメモリを再確保して伸長します。
ただし、getCode()を呼び出す前にジャンプ命令のアドレス解決をするためにready()
関数を呼ぶ必要があります。
次のように使います。
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
{
...
}
};
Code c;
c.ready(); // この呼び出しを忘れてはいけない
注意1. ready()を呼んで確定するまではgetCurr()で得たポインタは無効化されている
可能性があります。getSize()でoffsetを保持しておきready()のあとにgetCode()を
呼び出してからgetCode() + offsetで新しいポインタを取得してください。
注意2. AutoGrowモードでは64bitモードの相対アドレッシング[rip]は非サポートです。
-----------------------------------------------------------------------------
◎文法
Xbyak::CodeGeneratorクラスを継承し、そのクラスメソッド内でx86, x64アセンブラを
記述します。そのメソッドを呼び出した後、getCode()メソッドを呼び出し、その戻
り値を自分が使いたい関数ポインタに変換して利用します。アセンブルエラーは例外
により通知されます(cf. main.cpp)。
・基本的にnasmの命令で括弧をつければよいです。
mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx);
ret --> ret();
・アドレッシング
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
[rip + 32bit disp] ; x64 only
という形で指定します。サイズを指定する必要がない限りptrを使えばよいです。
セレクター(セグメントレジスタ)をサポートしました。
(注意)セグメントレジスタはOperandを継承していません。
mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]);
mov ax, cs --> mov(ax, cs);
mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]);
test byte [esp], 4 --> test (byte [esp], 4);
(注意) dword, word, byteはメンバ変数です。従ってたとえばunsigned intの
つもりでdwordをtypedefしないでください。
・AVX
FMAについては簡略表記を導入するか検討中です(アイデア募集中)。
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3, ptr [rax]); // メモリアクセスはptrで
vfmadd231pd(xmm1, xmm2, xmm3); // xmm1 <- (xmm2 * xmm3) + xmm1
*注意*
デスティネーションの省略形はサポートされなくなりました。
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
XBYAK_ENABLE_OMITTED_OPERANDを定義すると使えますが、将来はそれも非サポートになるでしょう。
・AVX-512
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 256-bit
注意
* k1, ..., k7 は新しいopmaskレジスタです。
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
* `k4 | k3`と`k3 | k4`は意味が異なります。
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
* 一部の命令はメモリサイズを指定するためにxword/yword/zword(_b)を使ってください。
・ラベル
L(文字列);
で定義します。ジャンプするときはその文字列を指定します。後方参照も可能ですが、
相対アドレスが8ビットに収まらない場合はT_NEARをつけないと実行時に例外が発生
します。
mov(eax, "L2");の様にラベルが表すアドレスをmovの即値として使えます。
・hasUndefinedLabel()を呼び出して真ならジャンプ先が存在しないことを示します。
コードを見直してください。
L("L1");
jmp ("L1");
jmp ("L2");
...
少しの命令の場合。
...
L("L2");
jmp ("L3", T_NEAR);
...
沢山の命令がある場合
...
L("L3");
<応用編>
1. MASMライクな@@, @f, @bをサポート
L("@@"); // <A>
jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B>
L("@@"); // <B>
jmp("@b"); // jmp to <B>
mov(eax, "@b");
jmp(eax); // jmp to <B>
2. ラベルの局所化
ピリオドで始まるラベルをinLocalLabel(), outLocalLabel()で挟むことで局所化できます。
inLocalLabel(), outLocalLabel()は入れ子にすることができます。
void func1()
{
inLocalLabel();
L(".lp"); // <A> ; ローカルラベル
...
jmp(".lp"); // jmpt to <A>
L("aaa"); // グローバルラベル
outLocalLabel();
}
void func2()
{
inLocalLabel();
L(".lp"); // <B> ; ローカルラベル
func1();
jmp(".lp"); // jmp to <B>
outLocalLabel();
}
上記サンプルではinLocalLabel(), outLocalLabel()が無いと、
".lp"ラベルの二重定義エラーになります。
3. 新しいLabelクラスによるジャンプ命令
ジャンプ先を文字列による指定だけでなくラベルクラスを使えるようになりました。
Label label1, label2;
L(label1);
...
jmp(label1);
...
jmp(label2);
...
L(label2);
更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。
Label label2;
Label label1 = L(); // Label label1; L(label1);と同じ意味
...
jmp(label2);
...
assignL(label2, label1);
上記jmp命令はlabel1にジャンプします。
制限
* srcLabelはL()により飛び先が確定していないといけません。
* dstLabelはL()により飛び先が確定していてはいけません。
ラベルは`getAddress()`によりそのアドレスを取得できます。
未定義のときは0が返ります。
```
// not AutoGrow mode
Label label;
assert(label.getAddress(), 0);
L(label);
assert(label.getAddress(), getCurr());
```
・Xbyak::CodeGenerator()コンストラクタインタフェース
@param maxSize [in] コード生成最大サイズ(デフォルト4096byte)
@param userPtr [in] ユーザ指定メモリ
CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0);
デフォルトコードサイズは4096(=DEFAULT_MAX_CODE_SIZE)バイトです。
それより大きなコードを生成する場合はCodeGenerator()のコンストラクタに指定してください。
class Quantize : public Xbyak::CodeGenerator {
public:
Quantize()
: CodeGenerator(8192)
{
}
...
};
またユーザ指定メモリをコード生成最大サイズと共に指定すると、CodeGeneratorは
指定されたメモリ上にバイト列を生成します。
補助関数として指定されたアドレスの実行属性を変更するCodeArray::protect()と
与えられたポインタからアライメントされたポインタを取得するCodeArray::getAlignedAddress()
も用意しました。詳細はsample/test0.cppのuse memory allocated by userを参考に
してください。
/**
change exec permission of memory
@param addr [in] buffer address
@param size [in] buffer size
@param canExec [in] true(enable to exec), false(disable to exec)
@return true(success), false(failure)
*/
bool CodeArray::protect(const void *addr, size_t size, bool canExec);
/**
get aligned memory pointer
*/
uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE);
・read/execモード
デフォルトのCodeGeneratorはコンストラクト時にJIT用の領域をread/write/execモードに設定して利用します。
コード生成時はread/writeでコード実行時にはread/execにしたい場合、次のようにしてください。
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096, Xbyak::DontUseProtect) // JIT領域をread/writeのままコード生成
{
mov(eax, 123);
ret();
}
};
Code c;
c.setProtectModeRE(); // read/execモードに変更
// JIT領域を実行
AutoGrowの場合はreadyの代わりにreadyRE()を読んでください。
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow) // JIT領域をread/writeのままコード生成
{
mov(eax, 123);
ret();
}
};
Code c;
c.readyRE(); // read/exeモードに変更
// JIT領域を実行
setProtectModeRW()を呼ぶと領域が元のread/execモードに戻ります。
その他詳細は各種サンプルを参照してください。
-----------------------------------------------------------------------------
◎マクロ
32bit環境上でコンパイルするとXBYAK32が、64bit環境上でコンパイルするとXBYAK64が
定義されます。さらに64bit環境上ではWindows(VC)ならXBYAK64_WIN、cygwin, gcc上では
XBYAK64_GCCが定義されます。
-----------------------------------------------------------------------------
◎使用例
test0.cpp ; 簡単な例(x86, x64)
quantize.cpp ; 割り算のJITアセンブルによる量子化の高速化(x86)
calc.cpp ; 与えられた多項式をアセンブルして実行(x86, x64)
boost(http://www.boost.org/)が必要
bf.cpp ; JIT Brainfuck(x86, x64)
-----------------------------------------------------------------------------
◎ライセンス
修正された新しいBSDライセンスに従います。
http://opensource.org/licenses/BSD-3-Clause
sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
いただきました。
-----------------------------------------------------------------------------
◎履歴
2020/02/26 ver 5.891 zm0のtype修正
2020/01/03 ver 5.89 vfpclasspdの処理エラー修正
2019/12/20 ver 5.88 Windowsでのコンパイルエラー修正
2019/12/19 ver 5.87 未定義ラベルへのjmp命令のデフォルト挙動をT_NEARにするsetDefaultJmpNEAR()を追加
2019/12/13 ver 5.86 [変更] -fno-operator-namesが指定されたときは5.84以前の挙動に戻す
2019/12/07 ver 5.85 mmapにMAP_JITフラグを追加(macOS mojave以上)
2019/11/29 ver 5.84 [変更] XBYAK_USE_OP_NAMESが定義されていない限りXBYAK_NO_OP_NAMESが定義されるように変更
2019/10/12 ver 5.83 exit(1)の除去
2019/09/23 ver 5.82 monitorx, mwaitx, clzero対応 (thanks to MagurosanTeam)
2019/09/14 ver 5.81 いくつかの一般命令をサポート
2019/08/01 ver 5.802 AVX512_BF16判定修正 (thanks to vpirogov)
2019/05/27 support vp2intersectd, vp2intersectq (not tested)
2019/05/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
2019/04/27 ver 5.79 vcmppd/vcmppsのptr_b対応忘れ(thanks to jkopinsky)
2019/04/15 ver 5.78 Reg::changeBit()のリファクタリング(thanks to MerryMage)
2019/03/06 ver 5.77 LLCキャッシュを共有数CPU数の修整(by densamoilov)
2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel)
2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元
2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す
2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除
2018/10/15 util::StackFrameでmovの代わりにpush/popを使う
2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整
2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加
2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加
2018/08/24 ver 5.68 indexが16以上のVSIBエンコーディングのバグ修正(thanks to petercaday)
2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm
2018/07/26 ver 5.661 mingw64対応
2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加
2018/06/26 ver 5.65 fix push(qword [mem])
2018/03/07 ver 5.64 Cpu()の中でzero divisionが出ることがあるのを修正
2018/02/14 ver 5.63 Cpu::setCacheHierarchy()の修正とclang<3.9のためのEvexModifierZero修正(thanks to mgouicem)
2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
2017/08/22 ver 5.53 mpxエンコーディングバグ修正, bnd()プレフィクス追加
2017/08/18 ver 5.52 align修正(thanks to MerryMage)
2017/08/17 ver 5.51 multi-byte nop追加 align()はそれを使用する(thanks to inolen)
2017/08/08 ver 5.50 mpx追加(thanks to magurosan)
2017/08/08 ver 5.45 sha追加(thanks to magurosan)
2017/08/08 ver 5.44 prefetchw追加(thanks to rsdubtso)
2017/07/12 ver 5.432 PVS-studioの警告を減らす
2017/07/09 ver 5.431 hasRex()修正 (影響なし) (thanks to drillsar)
2017/05/14 ver 5.43 CodeGenerator::resetSize()修正(thanks to gibbed)
2017/05/13 ver 5.42 movs{b,w,d,q}追加
2017/01/26 ver 5.41 prefetcwt1追加とscale == 0対応(thanks to rsdubtso)
2016/12/14 ver 5.40 Labelが示すアドレスを取得するLabel::getAddress()追加
2016/12/07 ver 5.34 disp8N時の負のオフセット処理の修正(thanks to rsdubtso)
2016/12/06 ver 5.33 disp8N時のvpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w}のバグ修正
2016/12/01 ver 5.32 clang for Visual Studioサポートのために__xgetbv()を_xgetbv()に変更(thanks to freiro)
2016/11/27 ver 5.31 AVX512_4VNNIをAVX512_4VNNIWに変更
2016/11/27 ver 5.30 AVX512_4VNNI, AVX512_4FMAPS命令の追加(thanks to rsdubtso)
2016/11/26 ver 5.20 AVX512_4VNNIとAVX512_4FMAPSの判定追加(thanks to rsdubtso)
2016/11/20 ver 5.11 何故か消えていたvptest for ymm追加(thanks to gregory38)
2016/11/20 ver 5.10 [rip+&var]の形のアドレッシング追加
2016/09/29 ver 5.03 ERR_INVALID_OPMASK_WITH_MEMORYの判定ミス修正(thanks to PVS-Studio)
2016/08/15 ver 5.02 xbyak_bin2hex.hをincludeしない
2016/08/15 ver 5.011 gcc 5.4のバージョン取得ミスの修正
2016/08/03 ver 5.01 AVXの省略表記非サポート
2016/07/24 ver 5.00 avx-512フルサポート
2016/06/13 avx-512 opmask命令サポート
2016/05/05 ver 4.91 AVX-512命令の検出サポート
2016/03/14 ver 4.901 ready()関数にコメント加筆(thanks to skmp)
2016/02/04 ver 4.90 条件分岐命令にjcc(const void *addr);のタイプを追加
2016/01/30 ver 4.89 vpblendvbがymmレジスタをサポートしていなかった(thanks to John Funnell)
2016/01/24 ver 4.88 lea, cmovの16bitレジスタ対応(thanks to whyisthisfieldhere)
2015/08/16 ver 4.87 セグメントセレクタに対応
2015/08/16 ver 4.86 [rip + label]アドレッシングで即値を使うと壊れる(thanks to whyisthisfieldhere)
2015/08/10 ver 4.85 Address::operator==()が間違っている(thanks to inolen)
2015/07/22 ver 4.84 call()がvariadic template対応
2015/05/24 ver 4.83 mobveサポート(thanks to benvanik)
2015/05/24 ver 4.82 F16Cが使えるかどうかの判定追加
2015/04/25 ver 4.81 setSizeが例外を投げる条件を修正(thanks to whyisthisfieldhere)
2015/04/22 ver 4.80 rip相対でLabelのサポート(thanks to whyisthisfieldhere)
2015/01/28 ver 4.71 adcx, adox, cmpxchg, rdseed, stacのサポート
2014/10/14 ver 4.70 MmapAllocatorのサポート
2014/06/13 ver 4.62 VC2014で警告抑制
2014/05/30 ver 4.61 bt, bts, btr, btcのサポート
2014/05/28 ver 4.60 vcvtph2ps, vcvtps2phのサポート
2014/04/11 ver 4.52 rdrandの判定追加
2014/03/25 ver 4.51 参照されなくなったラベルの状態を削除する
2014/03/16 ver 4.50 新しいラベルクラスのサポート
2014/03/05 ver 4.40 VirtualBox上でBMI/enhanced repのサポート判定を間違うことがあるのを修正
2013/12/03 ver 4.30 Reg::cvt8(), cvt16(), cvt32()のサポート
2013/10/16 ver 4.21 ラベルでstd::stringを受け付ける。
2013/07/30 ver 4.20 [break backward compatibility] 従来のReg32eクラスをアドレッシング用のRegExpとReg32, Reg64を表すReg32eに分離
2013/07/04 ver 4.10 [break backward compatibility] Xbyak::Errorの型をenumからclassに変更
2013/06/21 ver 4.02 LABELの指すアドレスを書き込むputL(LABEL)関数の追加。
2013/06/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm)
support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest)
2013/05/30 ver 4.00 AVX2, VEX-encoded GPR-instructionをサポート
2013/03/27 ver 3.80 mov(reg, "label");をサポート
2013/03/13 ver 3.76 cqo, jcxz, jecxz, jrcxz追加
2013/01/15 ver 3.75 生成されたコードを修正するためにsetSize()を追加
2013/01/12 ver 3.74 CodeGenerator::reset()とAllocator::useProtect()を追加
2013/01/06 ver 3.73 可能ならunordered_mapを使う
2012/12/04 ver 3.72 eaxなどをCodeGeneratorのメンバ変数に戻す. Xbyak::util::eaxはstatic const変数
2012/11/17 ver 3.71 and_(), or_(), xor_(), not_()をXBYAK_NO_OP_NAMESが定義されていないときでも使えるようにした
2012/11/17 CodeGeneratorのeax, ecx, ptrなどのメンバ変数をstaticにし、const参照をXbyak::utilにも定義
2012/11/09 ver 3.70 and()をand_()にするためのマクロXBYAK_NO_OP_NAMESを追加(thanks to Mattias)
2012/11/01 ver 3.62 add fwait/fnwait/finit/fninit
2012/11/01 ver 3.61 add fldcw/fstcw
2012/05/03 ver 3.60 Allocatorクラスのインタフェースを変更
2012/03/23 ver 3.51 userPtrモードがバグったのを修正
2012/03/19 ver 3.50 AutoGrowモードサポート
2011/11/09 ver 3.05 rip相対の64bitサイズ以外の扱いのバグ修正 / movsxdサポート
2011/08/15 ver 3.04 add(dword [ebp-8], 0xda);などにおけるimm8の扱いのバグ修正(thanks to lolcat)
2011/06/16 ver 3.03 Macのgcc上での__GNUC_PREREQがミスってたのを修正(thanks to t_teruya)
2011/04/28 ver 3.02 Macのgcc上ではxgetbvをdisable
2011/03/24 ver 3.01 fix typo of OSXSAVE
2011/03/23 ver 3.00 vcmpeqpsなどを追加
2011/02/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
2011/02/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
2011/02/10 ver 2.992 beta support one argument syntax for fadd like nasm
2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
2011/02/04 ver 2.99 beta support AVX
2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp
2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
2010/07/07 ver 2.29 fix call(<label>)
2010/06/17 ver 2.28 move some member functions to public
2010/06/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
2010/05/24 ver 2.26 fix sub(rsp, 1000)
2010/04/26 ver 2.25 add jc/jnc(I forgot to implement them...)
2010/04/16 ver 2.24 change the prototype of rewrite() method
2010/04/15 ver 2.23 fix align() and xbyak_util.h for Mac
2010/02/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
2009/12/09 ver 2.21 support cygwin(gcc 4.3.2)
2009/11/28 ver 2.20 FPUの一部命令サポート
2009/06/25 ver 2.11 64bitモードでの mov(qword[rax], imm); 修正(thanks to Martinさん)
2009/03/10 ver 2.10 jmp/call reg64の冗長なREG.W削除
2009/02/24 ver 2.09 movq reg64, mmx/xmm; movq mmx/xmm, reg64追加
2009/02/13 ver 2.08 movd(xmm7, dword[eax])が0x66を落とすバグ修正(thanks to Gabestさん)
2008/12/30 ver 2.07 call()の相対アドレスが8bit以下のときのバグ修正(thanks to katoさん)
2008/09/18 ver 2.06 @@, @f, @bとラベルの局所化機能追加(thanks to nobu-qさん)
2008/09/18 ver 2.05 ptr [rip + 32bit offset]サポート(thanks to 団子厨(Dango-Chu)さん)
2008/06/03 ver 2.04 align()のポカミス修正。mov(ptr[eax],1);などをエラーに
2008/06/02 ver 2.03 ユーザ定義メモリインタフェースサポート
2008/05/26 ver 2.02 protect()(on Linux)で不正な設定になることがあるのを修正(thanks to sinichiro_hさん)
2008/04/30 ver 2.01 cmpxchg16b, cdqe追加
2008/04/29 ver 2.00 x86/x64-64版公開
2008/04/25 ver 1.90 x64版β公開
2008/04/18 ver 1.12 コード整理
2008/04/14 ver 1.11 コード整理
2008/03/12 ver 1.10 bsf/bsr追加(忘れていた)
2008/02/14 ver 1.09 sub eax, 1234が16bitモードで出力されていたのを修正(thanks to Robertさん)
2007/11/05 ver 1.08 lock, xadd, xchg追加
2007/11/02 ver 1.07 SSSE3/SSE4対応(thanks to 団子厨(Dango-Chu)さん)
2007/09/25 ver 1.06 call((int)関数ポインタ); jmp((int)関数ポインタ);のサポート
2007/08/04 ver 1.05 細かい修正
2007/02/04 後方へのジャンプでT_NEARをつけないときに8bit相対アドレスに入らない
場合に例外が発生しないバグの修正
2007/01/21 [disp]の形のアドレス生成のバグ修正
mov (eax|ax|al, [disp]); mov([disp], eax|ax|al);の短い表現選択
2007/01/17 webページ作成
2007/01/04 公開開始
-----------------------------------------------------------------------------
◎著作権者
光成滋生(MITSUNARI Shigeo, herumi@nifty.com)

View File

@ -0,0 +1,113 @@
TARGET = test quantize bf toyvm test_util memfunc static_buf jmp_table
XBYAK_INC=../xbyak/xbyak.h
BOOST_EXIST=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1")
UNAME_M=$(shell uname -m)
ifeq ($(shell uname -s),Darwin)
ifeq ($(UNAME_M),x86_64)
BIT=64
endif
ifeq ($(UNAME_M),i386)
BIT=32
endif
ifeq ($(shell sw_vers -productVersion | cut -c1-4 | sed 's/\.//'),105)
ifeq ($(shell sysctl -n hw.cpu64bit_capable),1)
BIT=64
endif
endif
else
BIT=32
ifeq ($(UNAME_M),x86_64)
BIT=64
endif
ifeq ($(UNAME_M),amd64)
BIT=64
endif
endif
ifeq ($(BIT),64)
TARGET += test64 bf64 memfunc64 test_util64 static_buf64 jmp_table64
ifeq ($(BOOST_EXIST),1)
TARGET += calc64 #calc2_64
endif
endif
ifeq ($(BOOST_EXIST),1)
TARGET += calc #calc2
endif
all: $(TARGET)
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith -pedantic
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN)
test:
$(CXX) $(CFLAGS) test0.cpp -o $@ -m32
quantize:
$(CXX) $(CFLAGS) quantize.cpp -o $@ -m32
calc:
$(CXX) $(CFLAGS) calc.cpp -o $@ -m32
calc64:
$(CXX) $(CFLAGS) calc.cpp -o $@ -m64
calc2:
$(CXX) $(CFLAGS) calc2.cpp -o $@ -m32
calc2_64:
$(CXX) $(CFLAGS) calc2.cpp -o $@ -m64
bf:
$(CXX) $(CFLAGS) bf.cpp -o $@ -m32
bf64:
$(CXX) $(CFLAGS) bf.cpp -o $@ -m64
memfunc:
$(CXX) $(CFLAGS) memfunc.cpp -o $@ -m32
memfunc64:
$(CXX) $(CFLAGS) memfunc.cpp -o $@ -m64
toyvm:
$(CXX) $(CFLAGS) toyvm.cpp -o $@ -m32
test64:
$(CXX) $(CFLAGS) test0.cpp -o $@ -m64
test_util:
$(CXX) $(CFLAGS) test_util.cpp -o $@ -m32
test_util64:
$(CXX) $(CFLAGS) test_util.cpp -o $@ -m64
static_buf:
$(CXX) $(CFLAGS) static_buf.cpp -o $@ -m32
static_buf64:
$(CXX) $(CFLAGS) static_buf.cpp -o $@ -m64
jmp_table:
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m32
jmp_table64:
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m64
profiler: profiler.cpp ../xbyak/xbyak_util.h
$(CXX) $(CFLAGS) profiler.cpp -o $@
profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h
$(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
clean:
rm -rf *.o $(TARGET) *.exe profiler profiler-vtune
test : test0.cpp $(XBYAK_INC)
test64: test0.cpp $(XBYAK_INC)
quantize : quantize.cpp $(XBYAK_INC)
calc : calc.cpp $(XBYAK_INC)
calc64 : calc.cpp $(XBYAK_INC)
calc2 : calc2.cpp $(XBYAK_INC)
calc2_64 : calc2.cpp $(XBYAK_INC)
bf : bf.cpp $(XBYAK_INC)
bf64 : bf.cpp $(XBYAK_INC)
memfunc : memfunc.cpp $(XBYAK_INC)
memfunc64 : memfunc.cpp $(XBYAK_INC)
toyvm : toyvm.cpp $(XBYAK_INC)
static_buf: static_buf.cpp $(XBYAK_INC)
static_buf64: static_buf.cpp $(XBYAK_INC)
test_util : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
test_util2 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
jmp_table: jmp_table.cpp $(XBYAK_INC)
jmp_table64: jmp_table.cpp $(XBYAK_INC)

View File

@ -0,0 +1,211 @@
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#include <stdio.h>
#include <stdlib.h>
#include <stack>
#include <fstream>
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#define snprintf _snprintf_s
#endif
class Brainfuck : public Xbyak::CodeGenerator {
public:
int getContinuousChar(std::istream& is, char c)
{
int count = 1;
char p;
while (is >> p) {
if (p != c) break;
count++;
}
is.unget();
return count;
}
Brainfuck(std::istream& is) : CodeGenerator(100000)
{
// void (*)(void* putchar, void* getchar, int *stack)
using namespace Xbyak;
#ifdef XBYAK32
const Reg32& pPutchar(esi);
const Reg32& pGetchar(edi);
const Reg32& stack(ebp);
const Address cur = dword [stack];
push(ebp); // stack
push(esi);
push(edi);
const int P_ = 4 * 3;
mov(pPutchar, ptr[esp + P_ + 4]); // putchar
mov(pGetchar, ptr[esp + P_ + 8]); // getchar
mov(stack, ptr[esp + P_ + 12]); // stack
#elif defined(XBYAK64_WIN)
const Reg64& pPutchar(rsi);
const Reg64& pGetchar(rdi);
const Reg64& stack(rbp); // stack
const Address cur = dword [stack];
push(rsi);
push(rdi);
push(rbp);
mov(pPutchar, rcx); // putchar
mov(pGetchar, rdx); // getchar
mov(stack, r8); // stack
#else
const Reg64& pPutchar(rbx);
const Reg64& pGetchar(rbp);
const Reg64& stack(r12); // stack
const Address cur = dword [stack];
push(rbx);
push(rbp);
push(r12);
mov(pPutchar, rdi); // putchar
mov(pGetchar, rsi); // getchar
mov(stack, rdx); // stack
#endif
std::stack<Label> labelF, labelB;
char c;
while (is >> c) {
switch (c) {
case '+':
case '-':
{
int count = getContinuousChar(is, c);
if (count == 1) {
c == '+' ? inc(cur) : dec(cur);
} else {
add(cur, (c == '+' ? count : -count));
}
}
break;
case '>':
case '<':
{
int count = getContinuousChar(is, c);
add(stack, 4 * (c == '>' ? count : -count));
}
break;
case '.':
#ifdef XBYAK32
push(cur);
call(pPutchar);
pop(eax);
#elif defined(XBYAK64_WIN)
mov(ecx, cur);
sub(rsp, 32);
call(pPutchar);
add(rsp, 32);
#else
mov(edi, cur);
call(pPutchar);
#endif
break;
case ',':
#if defined(XBYAK32) || defined(XBYAK64_GCC)
call(pGetchar);
#elif defined(XBYAK64_WIN)
sub(rsp, 32);
call(pGetchar);
add(rsp, 32);
#endif
mov(cur, eax);
break;
case '[':
{
Label B = L();
labelB.push(B);
mov(eax, cur);
test(eax, eax);
Label F;
jz(F, T_NEAR);
labelF.push(F);
}
break;
case ']':
{
Label B = labelB.top(); labelB.pop();
jmp(B);
Label F = labelF.top(); labelF.pop();
L(F);
}
break;
default:
break;
}
}
#ifdef XBYAK32
pop(edi);
pop(esi);
pop(ebp);
#elif defined(XBYAK64_WIN)
pop(rbp);
pop(rdi);
pop(rsi);
#else
pop(r12);
pop(rbp);
pop(rbx);
#endif
ret();
}
};
void dump(const Xbyak::uint8 *code, size_t size)
{
puts("#include <stdio.h>\nstatic int stack[128 * 1024];");
#ifdef _MSC_VER
printf("static __declspec(align(4096)) ");
#else
printf("static __attribute__((aligned(4096)))");
#endif
puts("const unsigned char code[] = {");
for (size_t i = 0; i < size; i++) {
printf("0x%02x,", code[i]); if ((i % 16) == 15) putchar('\n');
}
puts("\n};");
#ifdef _MSC_VER
puts("#include <windows.h>");
#else
puts("#include <unistd.h>");
puts("#include <sys/mman.h>");
#endif
puts("int main()\n{");
#ifdef _MSC_VER
puts("\tDWORD oldProtect;");
puts("\tVirtualProtect((void*)code, sizeof(code), PAGE_EXECUTE_READWRITE, &oldProtect);");
#else
puts("\tlong pageSize = sysconf(_SC_PAGESIZE) - 1;");
puts("\tmprotect((void*)code, (sizeof(code) + pageSize) & ~pageSize, PROT_READ | PROT_EXEC);");
#endif
puts(
"\t((void (*)(void*, void*, int *))code)((void*)putchar, (void*)getchar, stack);\n"
"}"
);
}
int main(int argc, char *argv[])
{
#ifdef XBYAK32
fprintf(stderr, "32bit mode\n");
#else
fprintf(stderr, "64bit mode\n");
#endif
if (argc == 1) {
fprintf(stderr, "bf filename.bf [0|1]\n");
return 1;
}
std::ifstream ifs(argv[1]);
int mode = argc == 3 ? atoi(argv[2]) : 0;
try {
Brainfuck bf(ifs);
if (mode == 0) {
static int stack[128 * 1024];
bf.getCode<void (*)(const void*, const void*, int *)>()(reinterpret_cast<const void*>(putchar), reinterpret_cast<const void*>(getchar), stack);
} else {
dump(bf.getCode(), bf.getSize());
}
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}

View File

@ -0,0 +1,228 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{654BD79B-59D3-4B10-BBAA-158BAB272828}</ProjectGuid>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Midl>
<TypeLibraryName>.\Release/bf.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/bf.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Midl>
<TypeLibraryName>.\Debug/bf.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/bf.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/bf.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Release/bf.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/bf.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Debug/bf.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/bf.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/bf.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="bf.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -0,0 +1,229 @@
/*
@author herumi
tiny calculator
This program generates a function to calc the value of
polynomial given by user in run-time.
use boost::spirit::classic
see calc2.cpp for new version of boost::spirit
*/
#include <stdio.h>
#include <sstream>
#include <map>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#ifdef _MSC_VER
#pragma warning(disable : 4127) // for boost(constant condition)
#pragma warning(disable : 4512) // for boost
#endif
#include <boost/spirit/include/classic_file_iterator.hpp>
#include <boost/spirit/include/classic_core.hpp>
#include <boost/bind.hpp>
enum Error {
UNDEFINED_VARIABLE = 1
};
/*
JIT assemble of given polynomial for VC or gcc
*/
class FuncGen : public Xbyak::CodeGenerator {
public:
typedef std::map<std::string, int> Map;
private:
enum {
MAX_CONST_NUM = 32
};
double constTbl_[MAX_CONST_NUM];
size_t constTblPos_;
int regIdx_;
Map varMap_; // map var name to index
#ifdef XBYAK32
const Xbyak::Reg32& valTbl_;
const Xbyak::Reg32& tbl_;
#else
const Xbyak::Reg64& valTbl_;
const Xbyak::Reg64& tbl_;
#endif
public:
/*
@param y [out] the value of f(var)
@param var [in] table of input variables
func(double *y, const double var[]);
@note func does not return double to avoid difference of compiler
*/
FuncGen(const std::vector<std::string>& varTbl)
: constTblPos_(0)
, regIdx_(-1)
#ifdef XBYAK32
, valTbl_(eax)
, tbl_(edx)
#elif defined(XBYAK64_WIN)
, valTbl_(rcx)
, tbl_(rdx)
#else
, valTbl_(rdi)
, tbl_(rsi)
#endif
{
#ifdef XBYAK32
mov(valTbl_, ptr[esp+8]); // eax == varTbl
mov(tbl_, (size_t)constTbl_);
#else
#ifdef XBYAK64_WIN
movaps(ptr [rsp + 8], xm6); // save xm6, xm7
movaps(ptr [rsp + 8 + 16], xm7);
#endif
mov(tbl_, (size_t)constTbl_);
#endif
for (int i = 0, n = static_cast<int>(varTbl.size()); i < n; i++) {
varMap_[varTbl[i]] = i;
}
}
// use edx
void genPush(double n)
{
if (constTblPos_ >= MAX_CONST_NUM) throw;
constTbl_[constTblPos_] = n;
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[tbl_ + (int)(constTblPos_ * sizeof(double))]);
constTblPos_++;
}
// use eax
void genVal(const char *begin, const char *end)
{
std::string var(begin, end);
if (varMap_.find(var) == varMap_.end()) throw UNDEFINED_VARIABLE;
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[valTbl_ + varMap_[var] * sizeof(double)]);
}
void genAdd(const char*, const char*)
{
addsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genSub(const char*, const char*)
{
subsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genMul(const char*, const char*)
{
mulsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genDiv(const char*, const char*)
{
divsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void complete()
{
#ifdef XBYAK32
mov(eax, ptr [esp + 4]); // eax = valTbl
movsd(ptr [eax], xm0);
#else
#ifdef XBYAK64_WIN
movaps(xm6, ptr [rsp + 8]);
movaps(xm7, ptr [rsp + 8 + 16]);
#endif
#endif
ret();
}
};
struct Grammar : public boost::spirit::classic::grammar<Grammar> {
FuncGen& f_;
Grammar(FuncGen& f) : f_(f) { }
template<typename ScannerT>
struct definition {
boost::spirit::classic::rule<ScannerT> poly0, poly1, poly2, var;
definition(const Grammar& self)
{
using namespace boost;
using namespace boost::spirit::classic;
poly0 = poly1 >> *(('+' >> poly1)[bind(&FuncGen::genAdd, ref(self.f_), _1, _2)]
| ('-' >> poly1)[bind(&FuncGen::genSub, ref(self.f_), _1, _2)]);
poly1 = poly2 >> *(('*' >> poly2)[bind(&FuncGen::genMul, ref(self.f_), _1, _2)]
| ('/' >> poly2)[bind(&FuncGen::genDiv, ref(self.f_), _1, _2)]);
var = (+alpha_p)[bind(&FuncGen::genVal, ref(self.f_), _1, _2)];
poly2 = real_p[bind(&FuncGen::genPush, ref(self.f_), _1)]
| var
| '(' >> poly0 >> ')';
}
const boost::spirit::classic::rule<ScannerT>& start() const { return poly0; }
};
};
void put(const std::vector<double>& x)
{
for (size_t i = 0, n = x.size(); i < n; i++) {
if (i > 0) printf(", ");
printf("%f", x[i]);
}
}
int main(int argc, char *argv[])
{
if (argc <= 2) {
fprintf(stderr, "calc \"var1 var2 ...\" \"function of var\"\n");
fprintf(stderr, "eg. calc x \"x*x\"\n");
fprintf(stderr, "eg. calc \"x y z\" \"x*x + y - z\"\n");
return 1;
}
const char *poly = argv[2];
try {
std::vector<std::string> varTbl;
// get varTbl from argv[1]
{
std::istringstream is(argv[1]);
int i = 0;
printf("varTbl = { ");
while (is) {
std::string var;
is >> var;
if (var.empty()) break;
printf("%s:%d, ", var.c_str(), i);
varTbl.push_back(var);
i++;
}
printf("}\n");
}
FuncGen funcGen(varTbl);
Grammar calc(funcGen);
boost::spirit::classic::parse_info<> r = parse(poly, calc, boost::spirit::classic::space_p);
if (!r.full) {
printf("err poly=%s\n", poly);
return 1;
}
funcGen.complete();
std::vector<double> valTbl;
valTbl.resize(varTbl.size());
#ifdef XBYAK32
puts("32bit mode");
void (*func)(double *ret, const double *valTbl) = funcGen.getCode<void (*)(double *, const double*)>();
#else
puts("64bit mode");
double (*func)(const double *valTbl) = funcGen.getCode<double (*)(const double*)>();
#endif
for (int i = 0; i < 10; i++) {
for (size_t j = 0, n = valTbl.size(); j < n; j++) {
valTbl[j] = rand() % 7;
}
double y;
#ifdef XBYAK32
func(&y, &valTbl[0]);
#else
y = func(&valTbl[0]);
#endif
printf("f("); put(valTbl); printf(")=%f\n", y);
}
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (Error err) {
printf("ERR:%d\n", err);
} catch (...) {
printf("unknown error\n");
}
return 0;
}

View File

@ -0,0 +1,228 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{5FDDFAA6-B947-491D-A17E-BBD863846579}</ProjectGuid>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Midl>
<TypeLibraryName>.\Release/calc.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/calc.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Midl>
<TypeLibraryName>.\Debug/calc.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/calc.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/calc.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Release/calc.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/calc.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Debug/calc.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/calc.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/calc.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="calc.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -0,0 +1,302 @@
/*
@author herumi
tiny calculator 2
This program generates a function to calc the value of
polynomial given by user in run-time.
use boost::spirit::qi
*/
#ifdef _WIN32
#pragma warning(disable : 4127) // for boost(constant condition)
#pragma warning(disable : 4512) // for boost
#pragma warning(disable : 4819)
#endif
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/timer.hpp>
#include <stdio.h>
#include <assert.h>
#include <string>
#include <vector>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
enum Operand {
OpAdd,
OpSub,
OpMul,
OpDiv,
OpNeg,
OpImm,
OpVarX
};
struct Code {
Operand op_;
double val_;
Code(Operand op)
: op_(op)
, val_(0)
{
}
Code(double val)
: op_(OpImm)
, val_(val)
{
}
};
typedef std::vector<Code> CodeSet;
struct Vm {
CodeSet code_;
double operator()(double x) const
{
const size_t maxStack = 16;
double stack[maxStack];
double *p = stack;
CodeSet::const_iterator pc = code_.begin();
while (pc != code_.end()) {
switch (pc->op_) {
case OpVarX:
*p++ = x;
break;
case OpImm:
*p++ = pc->val_;
break;
case OpNeg:
p[-1] = -p[-1];
break;
case OpAdd:
--p;
p[-1] += p[0];
break;
case OpSub:
--p;
p[-1] -= p[0];
break;
case OpMul:
--p;
p[-1] *= p[0];
break;
case OpDiv:
--p;
p[-1] /= p[0];
break;
}
++pc;
assert(p < stack + maxStack);
}
return p[-1];
}
};
class Jit : public Xbyak::CodeGenerator {
private:
enum {
MAX_CONST_NUM = 32
};
MIE_ALIGN(16) double constTbl_[MAX_CONST_NUM];
Xbyak::uint64 negConst_;
size_t constTblPos_;
#ifdef XBYAK32
const Xbyak::Reg32& varTbl_;
const Xbyak::Reg32& tbl_;
#else
const Xbyak::Reg64& tbl_;
#endif
int regIdx_;
public:
/*
double jit(double x);
@note 32bit: x : [esp+4], return fp0
64bit: x [rcx](win), xmm0(gcc), return xmm0
*/
Jit()
: negConst_(Xbyak::uint64(1) << 63)
, constTblPos_(0)
#ifdef XBYAK32
, varTbl_(eax)
, tbl_(edx)
#elif defined(XBYAK64_WIN)
, tbl_(rcx)
#else
, tbl_(rdi)
#endif
, regIdx_(-1)
{
#ifdef XBYAK32
lea(varTbl_, ptr [esp+4]);
#else
#ifdef XBYAK64_WIN
movaps(ptr [rsp + 8], xm6); // save xm6, xm7
movaps(ptr [rsp + 8 + 16], xm7);
#endif
movaps(xm7, xm0); // save xm0
#endif
mov(tbl_, (size_t)constTbl_);
}
void genPush(double n)
{
if (constTblPos_ >= MAX_CONST_NUM) throw;
constTbl_[constTblPos_] = n;
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[tbl_ + constTblPos_ * sizeof(double)]);
constTblPos_++;
}
void genVarX()
{
#ifdef XBYAK32
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[varTbl_]);
#else
if (regIdx_ == 6) throw;
movsd(Xbyak::Xmm(++regIdx_), xm7);
#endif
}
void genAdd()
{
addsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genSub()
{
subsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genMul()
{
mulsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genDiv()
{
divsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genNeg()
{
xorpd(Xbyak::Xmm(regIdx_), ptr [tbl_ + MAX_CONST_NUM * sizeof(double)]);
}
void complete()
{
#ifdef XBYAK32
sub(esp, 8);
movsd(ptr [esp], xm0);
fld(qword [esp]);
add(esp, 8);
#else
#ifdef XBYAK64_WIN
movaps(xm6, ptr [rsp + 8]);
movaps(xm7, ptr [rsp + 8 + 16]);
#endif
#endif
ret();
}
};
template<typename Iterator>
struct Parser : boost::spirit::qi::grammar<Iterator, boost::spirit::ascii::space_type> {
boost::spirit::qi::rule<Iterator, boost::spirit::ascii::space_type> expression, term, factor;
CodeSet& code_;
Parser(CodeSet& code)
: Parser::base_type(expression)
, code_(code)
{
namespace qi = boost::spirit::qi;
using namespace qi::labels;
using boost::phoenix::ref;
using boost::phoenix::push_back;
expression = term >> *(('+' > term[push_back(ref(code_), OpAdd)])
| ('-' > term[push_back(ref(code_), OpSub)]));
term = factor >> *(('*' > factor[push_back(ref(code_), OpMul)])
| ('/' > factor[push_back(ref(code_), OpDiv)]));
factor = qi::double_[push_back(ref(code_), _1)]
| qi::lit('x')[push_back(ref(code_), OpVarX)]
| ('(' > expression > ')')
| ('-' > factor[push_back(ref(code_), OpNeg)])
| ('+' > factor);
}
};
template<typename Iterator>
struct ParserJit : boost::spirit::qi::grammar<Iterator, boost::spirit::ascii::space_type> {
boost::spirit::qi::rule<Iterator, boost::spirit::ascii::space_type> expression, term, factor;
Jit code_;
ParserJit()
: ParserJit::base_type(expression)
{
namespace qi = boost::spirit::qi;
using namespace qi::labels;
using boost::phoenix::ref;
using boost::phoenix::push_back;
using boost::phoenix::bind;
expression = term >> *(('+' > term[bind(&Jit::genAdd, ref(code_))])
| ('-' > term[bind(&Jit::genSub, ref(code_))]));
term = factor >> *(('*' > factor[bind(&Jit::genMul, ref(code_))])
| ('/' > factor[bind(&Jit::genDiv, ref(code_))]));
factor = qi::double_[bind(&Jit::genPush, ref(code_), _1)]
| qi::lit('x')[bind(&Jit::genVarX, ref(code_))]
| ('(' > expression > ')')
| ('-' > factor[bind(&Jit::genNeg, ref(code_))])
| ('+' > factor);
}
};
template<class Func>
void Test(const char *msg, const Func& f)
{
printf("%s:", msg);
boost::timer t;
double sum = 0;
for (double x = 0; x < 1000; x += 0.0001) {
sum += f(x);
}
printf("sum=%f, %fsec\n", sum, t.elapsed());
}
int main(int argc, char *argv[])
{
if (argc < 2) {
fprintf(stderr, "input formula\n");
return 1;
}
const std::string str(argv[1]);
try {
Vm vm;
Parser<std::string::const_iterator> parser(vm.code_);
ParserJit<std::string::const_iterator> parserJit;
const std::string::const_iterator end = str.end();
std::string::const_iterator i = str.begin();
if (!phrase_parse(i, end, parser, boost::spirit::ascii::space) || i != end) {
puts("err 1");
return 1;
}
printf("ret=%f\n", vm(2.3));
i = str.begin();
if (!phrase_parse(i, end, parserJit, boost::spirit::ascii::space) || i != end) {
puts("err 2");
return 1;
}
parserJit.code_.complete();
double (*jit)(double) = parserJit.code_.getCode<double (*)(double)>();
Test("VM ", vm);
Test("JIT", jit);
} catch (...) {
fprintf(stderr, "err\n");
}
}

View File

@ -0,0 +1,5 @@
>>++++++++[->++++++++<]>>>>+++++++++[->++++++++++<]>[<<,[->+<<+<<+>>>]<<<[
->>>+<<<]>>>>>[->+>>+<<<]>[<<[->+>>+<<<]>>>[-<<<+>>>]<<[[-]<->]>-]>>[-<<<+
>>>]<<<<<<<[-<+<<+>>>]<[>>[-<+<<+>>>]<<<[->>>+<<<]>>[[-]>-<]<-]<<[->>>+<<<
]>>>>><[[-]>++++++++++++++++++++++++++++++++>[[-]<------------------------
-------->]<<]>>[-]<.>>]

View File

@ -0,0 +1,19 @@
++++++[->++++>>+>+>-<<<<<]>
[<++++>>+++>++++>>+++>+++++>+++++>>>>>>++>>++<<<<<<<<<<<<<<-]
<++++>+++>-->+++>->>--->++>>>+++++[->++>++<<]<<<<<<<<<<
[->
-[>>>>>>>]>[<+++>.>.>>>>..>>>+<]<<<<<
-[>>>>]>[<+++++>.>.>..>>>+<]>>>>
+<-[<<<]<[
[-<<+>>]>>>+>+<<<<<<[->>+>+>-<<<<]<
]>>
[[-]<]>[
>>>[>.<<.<<<]<[.<<<<]>
]
>.<<<<<<<<<<<
]

View File

@ -0,0 +1,3 @@
>+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++
++>-]<.>+++++++++++[<+++++>-]<.>++++++++[<+++>-]<.+++.------.--------.[-]>
++++++++[<++++>-]<+.[-]++++++++++.

View File

@ -0,0 +1,128 @@
/*
sample of move(reg, LABEL);, L(LABEL), putL(LABEL);
*/
#include <stdio.h>
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
const int expectTbl[] = {
5, 9, 12
};
struct Code : Xbyak::CodeGenerator {
explicit Code(int mode, size_t size, void *p)
: Xbyak::CodeGenerator(size, p)
{
inLocalLabel();
#ifdef XBYAK64
const Xbyak::Reg64& a = rax;
const Xbyak::Reg64& c = rcx;
#ifdef XBYAK64_WIN
mov(rax, rcx);
#else
mov(rax, rdi);
#endif
#else
const Xbyak::Reg32& a = eax;
const Xbyak::Reg32& c = ecx;
mov(a, ptr [esp + 4]);
#endif
switch (mode) {
case 0:
mov(c, ".jmp_table");
lea(c, ptr [c + a * 8]);
jmp(c);
align(8);
L(".jmp_table");
mov(a, expectTbl[0]);
ret();
align(8);
mov(a, expectTbl[1]);
ret();
align(8);
mov(a, expectTbl[2]);
ret();
break;
case 1:
/*
the label for putL is defined when called
*/
mov(c, ".jmp_table");
jmp(ptr [c + a * (int)sizeof(size_t)]);
L(".label1");
mov(a, expectTbl[0]);
jmp(".end");
L(".label2");
mov(a, expectTbl[1]);
jmp(".end");
L(".label3");
mov(a, expectTbl[2]);
jmp(".end");
L(".end");
ret();
ud2();
align(8);
L(".jmp_table");
putL(".label1");
putL(".label2");
putL(".label3");
break;
case 2:
/*
the label for putL is not defined when called
*/
jmp(".in");
ud2();
align(8);
L(".jmp_table");
putL(".label1");
putL(".label2");
putL(".label3");
L(".in");
mov(c, ".jmp_table");
jmp(ptr [c + a * (int)sizeof(size_t)]);
L(".label1");
mov(a, expectTbl[0]);
jmp(".end");
L(".label2");
mov(a, expectTbl[1]);
jmp(".end");
L(".label3");
mov(a, expectTbl[2]);
jmp(".end");
L(".end");
ret();
break;
}
outLocalLabel();
}
};
int main()
try
{
for (int mode = 0; mode < 3; mode++) {
printf("mode=%d\n", mode);
for (int grow = 0; grow < 2; grow++) {
printf("auto grow=%s\n", grow ? "on" : "off");
Code c(mode, grow ? 30 : 4096, grow ? Xbyak::AutoGrow : 0);
int (*f)(int) = c.getCode<int (*)(int)>();
c.ready();
for (int i = 0; i < 3; i++) {
const int a = expectTbl[i];
const int b = f(i);
if (a != b) {
printf("ERR i=%d, a=%d, b=%d\n", i, a, b);
exit(1);
}
}
}
}
puts("ok");
} catch (std::exception& e) {
printf("ERR %s\n", e.what());
}

View File

@ -0,0 +1,111 @@
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
struct A {
int x_;
int y_;
A() : x_(3), y_(5) {}
int func(int a, int b, int c, int d, int e) const { return x_ + y_ + a + b + c + d + e; }
};
#ifdef _MSC_VER
#pragma warning(disable : 4510 4512 4610)
#endif
struct Code : public Xbyak::CodeGenerator {
Code()
{
using namespace Xbyak;
int RET_ADJ = 0;
#ifdef XBYAK32
#ifdef _WIN32
const int PARA_ADJ = 0;
RET_ADJ = 5 * 4;
#else
const int PARA_ADJ = 4;
mov(ecx, ptr [esp + 4]);
#endif
#endif
const struct {
#ifdef XBYAK32
const Reg32& self;
#else
const Reg64& self;
#endif
const Operand& a;
const Operand& b;
const Operand& c;
const Operand& d;
const Operand& e;
} para = {
#if defined(XBYAK64_WIN)
rcx,
edx,
r8d,
r9d,
ptr [rsp + 8 * 5],
ptr [rsp + 8 * 6],
#elif defined(XBYAK64_GCC)
rdi,
esi,
edx,
ecx,
r8d,
r9d,
#else
ecx,
ptr [esp + 4 + PARA_ADJ],
ptr [esp + 8 + PARA_ADJ],
ptr [esp + 12 + PARA_ADJ],
ptr [esp + 16 + PARA_ADJ],
ptr [esp + 20 + PARA_ADJ],
#endif
};
mov(eax, ptr [para.self]);
add(eax, ptr [para.self + 4]);
add(eax, para.a);
add(eax, para.b);
add(eax, para.c);
add(eax, para.d);
add(eax, para.e);
ret(RET_ADJ);
}
};
int main()
{
#ifdef XBYAK64
printf("64bit");
#else
printf("32bit");
#endif
#ifdef _WIN32
puts(" win");
#else
puts(" linux");
#endif
try {
Code code;
int (A::*p)(int, int, int, int, int) const = 0;
const void *addr = code.getCode<void*>();
memcpy(&p, &addr, sizeof(void*));
for (int i = 0; i < 10; i++) {
A a;
int t1, t2, t3, t4, t5, x, y;
a.x_ = rand(); a.y_ = rand();
t1 = rand(); t2 = rand(); t3 = rand();
t4 = rand(); t5 = rand();
x = a.func(t1, t2, t3, t4, t5);
y = (a.*p)(t1, t2, t3, t4, t5);
printf("%c %d, %d\n", x == y ? 'o' : 'x', x, y);
}
} catch (std::exception& e) {
printf("err=%s\n", e.what());
return 1;
}
}

View File

@ -0,0 +1,90 @@
/*
How to profile JIT-code with perf or VTune
sudo perf record ./profiler 1
amplxe-cl -collect hotspots -result-dir r001hs -quiet ./profiler-vtune 2
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <xbyak/xbyak_util.h>
const int N = 3000000;
struct Code : public Xbyak::CodeGenerator {
Code()
{
mov(eax, N);
Xbyak::Label lp = L();
for (int i = 0; i < 10; i++) {
sub(eax, 1);
}
jg(lp);
mov(eax, 1);
ret();
}
};
struct Code2 : public Xbyak::CodeGenerator {
Code2()
{
mov(eax, N);
Xbyak::Label lp = L();
for (int i = 0; i < 10; i++) {
xorps(xm0, xm0);
}
sub(eax, 1);
jg(lp);
mov(eax, 1);
ret();
}
};
double s1(int n)
{
double r = 0;
for (int i = 0; i < n; i++) {
r += 1.0 / (i + 1);
}
return r;
}
double s2(int n)
{
double r = 0;
for (int i = 0; i < n; i++) {
r += 1.0 / (i * i + 1) + 2.0 / (i + 3);
}
return r;
}
int main(int argc, char *argv[])
{
int mode = argc == 1 ? 0 : atoi(argv[1]);
Code c;
Code2 c2;
int (*f)() = (int (*)())c.getCode();
int (*g)() = (int (*)())c2.getCode();
printf("f:%p, %d\n", (const void*)f, (int)c.getSize());
printf("g:%p, %d\n", (const void*)g, (int)c2.getSize());
Xbyak::util::Profiler prof;
printf("mode=%d\n", mode);
prof.init(mode);
prof.set("f", (const void*)f, c.getSize());
prof.set("g", (const void*)g, c2.getSize());
double sum = 0;
for (int i = 0; i < 20000; i++) {
sum += s1(i);
sum += s2(i);
}
printf("sum=%f\n", sum);
for (int i = 0; i < 2000; i++) {
sum += f();
}
printf("f=%f\n", sum);
for (int i = 0; i < 2000; i++) {
sum += g();
}
printf("g=%f\n", sum);
puts("end");
}

View File

@ -0,0 +1,70 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
struct Code1 : Xbyak::CodeGenerator {
Code1()
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
{
mov(eax, 123);
ret();
}
void update()
{
db(0);
}
};
void test1(bool updateCode)
{
Code1 c;
c.setProtectModeRE();
if (updateCode) c.update(); // segmentation fault
int (*f)() = c.getCode<int (*)()>();
printf("f=%d\n", f());
c.setProtectModeRW();
c.update();
puts("ok");
}
struct Code2 : Xbyak::CodeGenerator {
Code2()
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow)
{
mov(eax, 123);
ret();
}
void update()
{
db(0);
}
};
void test2(bool updateCode)
{
Code2 c;
c.readyRE();
if (updateCode) c.update(); // segmentation fault
int (*f)() = c.getCode<int (*)()>();
printf("f=%d\n", f());
c.setProtectModeRW();
c.update();
puts("ok");
}
int main(int argc, char *argv[])
{
if (argc < 2) {
fprintf(stderr, "%s <testNum> [update]\n", argv[0]);
return 0;
}
bool update = argc == 3;
int n = atoi(argv[1]);
printf("n=%d update=%d\n", n, update);
switch (n) {
case 1: test1(update); break;
case 2: test2(update); break;
default: fprintf(stderr, "no test %d\n", n); break;
}
}

View File

@ -0,0 +1,229 @@
/*
@author herumi
JPEG quantize sample
This program generates a quantization routine by using fast division algorithm in run-time.
time(sec)
quality 1(high) 10 50 100(low)
VC2005 8.0 8.0 8.0 8.0
Xbyak 1.6 0.8 0.5 0.5
; generated code at q = 1
push esi
push edi
mov edi,dword ptr [esp+0Ch]
mov esi,dword ptr [esp+10h]
mov eax,dword ptr [esi]
shr eax,4
mov dword ptr [edi],eax
mov eax,dword ptr [esi+4]
mov edx,0BA2E8BA3h
mul eax,edx
shr edx,3
...
; generated code at q = 100
push esi
push edi
mov edi,dword ptr [esp+0Ch]
mov esi,dword ptr [esp+10h]
mov eax,dword ptr [esi]
mov dword ptr [edi],eax
mov eax,dword ptr [esi+4]
mov dword ptr [edi+4],eax
mov eax,dword ptr [esi+8]
mov dword ptr [edi+8],eax
mov eax,dword ptr [esi+0Ch]
...
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#endif
typedef Xbyak::uint64 uint64;
typedef Xbyak::uint32 uint32;
const int N = 64;
class Quantize : public Xbyak::CodeGenerator {
static int ilog2(int x)
{
int shift = 0;
while ((1 << shift) <= x) shift++;
return shift - 1;
}
public:
/*
input : esi
output : eax = [esi+offset] / dividend
destroy : edx
*/
void udiv(uint32 dividend, int offset)
{
mov(eax, ptr[esi + offset]);
/* dividend = odd x 2^exponent */
int exponent = 0, odd = dividend;
while ((odd & 1) == 0) {
odd >>= 1; exponent++;
}
if (odd == 1) { // trivial case
if (exponent) {
shr(eax, exponent);
}
return;
}
uint64 mLow, mHigh;
int len = ilog2(odd) + 1;
{
uint64 roundUp = uint64(1) << (32 + len);
uint64 k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
mLow = roundUp / odd;
mHigh = (roundUp + k) / odd;
}
while (((mLow >> 1) < (mHigh >> 1)) && (len > 0)) {
mLow >>= 1; mHigh >>= 1; len--;
}
uint64 m; int a;
if ((mHigh >> 32) == 0) {
m = mHigh; a = 0;
} else {
len = ilog2(odd);
uint64 roundDown = uint64(1) << (32 + len);
mLow = roundDown / odd;
int r = (int)(roundDown % odd);
m = (r <= (odd >> 1)) ? mLow : mLow + 1;
a = 1;
}
while ((m & 1) == 0) {
m >>= 1; len--;
}
len += exponent;
mov(edx, int(m));
mul(edx);
if (a) {
add(eax, int(m));
adc(edx, 0);
}
if (len) {
shr(edx, len);
}
mov(eax, edx);
}
/*
quantize(uint32 dest[64], const uint32 src[64]);
*/
Quantize(const uint32 qTbl[64])
{
push(esi);
push(edi);
const int P_ = 4 * 2;
mov(edi, ptr [esp+P_+4]); // dest
mov(esi, ptr [esp+P_+8]); // src
for (int i = 0; i < N; i++) {
udiv(qTbl[i], i * 4);
mov(ptr[edi+i*4], eax);
}
pop(edi);
pop(esi);
ret();
}
};
void quantize(uint32 dest[64], const uint32 src[64], const uint32 qTbl[64])
{
for (int i = 0; i < N; i++) {
dest[i] = src[i] / qTbl[i];
}
}
#ifdef XBYAK64
int main()
{
puts("not implemented for 64bit");
return 1;
}
#else
int main(int argc, char *argv[])
{
int q;
if (argc > 1) {
q = atoi(argv[1]);
} else {
printf("input quantize=");
if (scanf("%d", &q) != 1) {
fprintf(stderr, "bad number\n");
return 1;
}
}
printf("q=%d\n", q);
uint32 qTbl[] = {
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99
};
for (int i = 0; i < N; i++) {
qTbl[i] /= q;
if (qTbl[i] == 0) qTbl[i] = 1;
}
try {
uint32 src[N];
uint32 dest[N];
uint32 dest2[N];
for (int i = 0; i < N; i++) {
src[i] = rand() % 2048;
}
Quantize jit(qTbl);
//printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode());
void (*quantize2)(uint32*, const uint32*, const uint32 *) = jit.getCode<void (*)(uint32*, const uint32*, const uint32 *)>();
quantize(dest, src, qTbl);
quantize2(dest2, src, qTbl);
for (int i = 0; i < N; i++) {
if (dest[i] != dest2[i]) {
printf("err[%d] %d %d\n", i, dest[i], dest2[i]);
}
}
const int count = 10000000;
int begin;
begin = clock();
for (int i = 0; i < count; i++) {
quantize(dest, src, qTbl);
}
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
begin = clock();
for (int i = 0; i < count; i++) {
quantize2(dest, src, qTbl);
}
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}
#endif

View File

@ -0,0 +1,228 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{D06753BF-E1F3-4578-9B18-08673327F77C}</ProjectGuid>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Midl>
<TypeLibraryName>.\Debug/quantize.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/quantize.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/quantize.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Midl>
<TypeLibraryName>.\Release/quantize.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/quantize.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Debug/quantize.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/quantize.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/quantize.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Release/quantize.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/quantize.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="quantize.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -0,0 +1,29 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak_util.h>
#ifdef XBYAK32
#error "this sample is for only 64-bit mode"
#endif
struct Code : public Xbyak::CodeGenerator {
Code()
{
// see xbyak/sample/sf_test.cpp for how to use other parameter
Xbyak::util::StackFrame sf(this, 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
};
int main()
{
Code c;
int (*f)(int, int, int) = c.getCode<int(*) (int, int, int)>();
int ret = f(3, 5, 2);
if (ret == 3 + 5 + 2) {
puts("ok");
} else {
puts("ng");
}
}

View File

@ -0,0 +1,45 @@
/*
sample to use static memory
*/
#include <stdio.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
MIE_ALIGN(4096) char buf[4096];
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(sizeof(buf), buf)
{
puts("generate");
printf("ptr=%p, %p\n", getCode(), buf);
#ifdef XBYAK32
mov(eax, ptr [esp + 4]);
add(eax, ptr [esp + 8]);
#elif defined(XBYAK64_WIN)
lea(rax, ptr [rcx + rdx]);
#else
lea(rax, ptr [rdi + rsi]);
#endif
ret();
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RE);
}
~Code()
{
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RW);
}
} s_code;
inline int add(int a, int b)
{
return reinterpret_cast<int (*)(int, int)>(buf)(a, b);
}
int main()
{
int sum = 0;
for (int i = 0; i < 10; i++) {
sum += add(i, 5);
}
printf("sum=%d\n", sum);
}

View File

@ -0,0 +1,190 @@
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#if defined(_MSC_VER) && (_MSC_VER >= 1900)
#pragma warning(disable:4456)
#endif
#include <stdio.h>
#include <stdlib.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
class Sample : public Xbyak::CodeGenerator {
void operator=(const Sample&);
public:
Sample(void *userPtr = 0, size_t size = Xbyak::DEFAULT_MAX_CODE_SIZE) : Xbyak::CodeGenerator(size, userPtr)
{
inLocalLabel(); // use local label for multiple instance
#ifdef XBYAK32
mov(ecx, ptr [esp + 4]); // n
#elif defined(XBYAK64_GCC)
mov(ecx, edi); // n
#else
// n = ecx
#endif
xor_(eax, eax); // sum
test(ecx, ecx);
jz(".exit");
xor_(edx, edx); // i
L(".lp");
add(eax, edx);
inc(edx);
cmp(edx, ecx);
jbe(".lp"); // jmp to previous @@
L(".exit"); // <B>
ret();
outLocalLabel(); // end of local label
}
};
class AddFunc : public Xbyak::CodeGenerator {
void operator=(const AddFunc&);
public:
AddFunc(int y)
{
#ifdef XBYAK32
mov(eax, ptr [esp + 4]);
add(eax, y);
#elif defined(XBYAK64_WIN)
lea(rax, ptr [rcx + y]);
#else
lea(eax, ptr [edi + y]);
#endif
ret();
}
int (*get() const)(int) { return getCode<int(*)(int)>(); }
};
class CallAtoi : public Xbyak::CodeGenerator {
void operator=(const CallAtoi&);
public:
CallAtoi()
{
#ifdef XBYAK64
#ifdef XBYAK64_WIN
sub(rsp, 32); // return-address is destroied if 64bit debug mode
#endif
mov(rax, (size_t)atoi);
call(rax);
#ifdef XBYAK64_WIN
add(rsp, 32);
#endif
#else
mov(eax, ptr [esp + 4]);
push(eax);
#ifdef XBYAK_VARIADIC_TEMPLATE
call(atoi);
#else
call(reinterpret_cast<const void*>(atoi));
#endif
add(esp, 4);
#endif
ret();
}
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
};
class JmpAtoi : public Xbyak::CodeGenerator {
void operator=(const JmpAtoi&);
public:
JmpAtoi()
{
/* already pushed "456" */
#ifdef XBYAK64
mov(rax, (size_t)atoi);
jmp(rax);
#else
jmp(reinterpret_cast<const void*>(atoi));
#endif
}
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
};
struct Reset : public Xbyak::CodeGenerator {
void init(int n)
{
xor_(eax, eax);
mov(ecx, n);
test(ecx, ecx);
jnz("@f");
ret();
L("@@");
for (int i = 0; i < 10 - n; i++) {
add(eax, ecx);
}
sub(ecx, 1);
jnz("@b");
ret();
}
};
void testReset()
{
puts("testReset");
Reset code;
int (*f)(int) = code.getCode<int(*)(int)>();
for (int i = 0; i < 10; i++) {
code.init(i);
int v = f(i);
printf("%d %d\n", i, v);
code.reset();
}
}
int main()
{
try {
Sample s;
printf("Xbyak version=%s\n", s.getVersionString());
#ifdef XBYAK64_GCC
puts("64bit mode(gcc)");
#elif defined(XBYAK64_WIN)
puts("64bit mode(win)");
#else
puts("32bit");
#endif
int (*func)(int) = s.getCode<int (*)(int)>();
for (int i = 0; i <= 10; i++) {
printf("0 + ... + %d = %d\n", i, func(i));
}
for (int i = 0; i < 10; i++) {
AddFunc a(i);
int (*add)(int) = a.get();
int y = add(i);
printf("%d + %d = %d\n", i, i, y);
}
CallAtoi c;
printf("call atoi(\"123\") = %d\n", c.get()("123"));
JmpAtoi j;
printf("jmp atoi(\"456\") = %d\n", j.get()("456"));
{
// use memory allocated by user
using namespace Xbyak;
const size_t codeSize = 4096;
uint8 buf[codeSize + 16];
uint8 *p = CodeArray::getAlignedAddress(buf);
Sample s(p, codeSize);
if (!CodeArray::protect(p, codeSize, CodeArray::PROTECT_RWE)) {
fprintf(stderr, "can't protect\n");
return 1;
}
int (*func)(int) = s.getCode<int (*)(int)>();
const uint8 *funcp = reinterpret_cast<const uint8*>(func);
if (funcp != p) {
fprintf(stderr, "internal error %p %p\n", p, funcp);
return 1;
}
printf("0 + ... + %d = %d\n", 100, func(100));
CodeArray::protect(p, codeSize, CodeArray::PROTECT_RW);
}
puts("OK");
testReset();
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}

View File

@ -0,0 +1,228 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}</ProjectGuid>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Midl>
<TypeLibraryName>.\Debug/test0.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/test0.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/test0.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Midl>
<TypeLibraryName>.\Release/test0.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/test0.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Debug/test0.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/test0.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/test0.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Release/test0.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/test0.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="test0.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -0,0 +1,125 @@
#include <stdio.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak_util.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
struct PopCountTest : public Xbyak::CodeGenerator {
PopCountTest(int n)
{
mov(eax, n);
popcnt(eax, eax);
ret();
}
};
void putCPUinfo()
{
using namespace Xbyak::util;
Cpu cpu;
printf("vendor %s\n", cpu.has(Cpu::tINTEL) ? "intel" : "amd");
static const struct {
Cpu::Type type;
const char *str;
} tbl[] = {
{ Cpu::tMMX, "mmx" },
{ Cpu::tMMX2, "mmx2" },
{ Cpu::tCMOV, "cmov" },
{ Cpu::tSSE, "sse" },
{ Cpu::tSSE2, "sse2" },
{ Cpu::tSSE3, "sse3" },
{ Cpu::tSSSE3, "ssse3" },
{ Cpu::tSSE41, "sse41" },
{ Cpu::tSSE42, "sse42" },
{ Cpu::tPOPCNT, "popcnt" },
{ Cpu::t3DN, "3dn" },
{ Cpu::tE3DN, "e3dn" },
{ Cpu::tSSE4a, "sse4a" },
{ Cpu::tSSE5, "sse5" },
{ Cpu::tAESNI, "aesni" },
{ Cpu::tRDTSCP, "rdtscp" },
{ Cpu::tOSXSAVE, "osxsave(xgetvb)" },
{ Cpu::tPCLMULQDQ, "pclmulqdq" },
{ Cpu::tAVX, "avx" },
{ Cpu::tFMA, "fma" },
{ Cpu::tAVX2, "avx2" },
{ Cpu::tBMI1, "bmi1" },
{ Cpu::tBMI2, "bmi2" },
{ Cpu::tLZCNT, "lzcnt" },
{ Cpu::tPREFETCHW, "prefetchw" },
{ Cpu::tENHANCED_REP, "enh_rep" },
{ Cpu::tRDRAND, "rdrand" },
{ Cpu::tADX, "adx" },
{ Cpu::tRDSEED, "rdseed" },
{ Cpu::tSMAP, "smap" },
{ Cpu::tHLE, "hle" },
{ Cpu::tRTM, "rtm" },
{ Cpu::tMPX, "mpx" },
{ Cpu::tSHA, "sha" },
{ Cpu::tPREFETCHWT1, "prefetchwt1" },
{ Cpu::tF16C, "f16c" },
{ Cpu::tMOVBE, "movbe" },
{ Cpu::tAVX512F, "avx512f" },
{ Cpu::tAVX512DQ, "avx512dq" },
{ Cpu::tAVX512IFMA, "avx512_ifma" },
{ Cpu::tAVX512PF, "avx512pf" },
{ Cpu::tAVX512ER, "avx512er" },
{ Cpu::tAVX512CD, "avx512cd" },
{ Cpu::tAVX512BW, "avx512bw" },
{ Cpu::tAVX512VL, "avx512vl" },
{ Cpu::tAVX512VBMI, "avx512_vbmi" },
{ Cpu::tAVX512_4VNNIW, "avx512_4vnniw" },
{ Cpu::tAVX512_4FMAPS, "avx512_4fmaps" },
{ Cpu::tAVX512_VBMI2, "avx512_vbmi2" },
{ Cpu::tGFNI, "gfni" },
{ Cpu::tVAES, "vaes" },
{ Cpu::tVPCLMULQDQ, "vpclmulqdq" },
{ Cpu::tAVX512_VNNI, "avx512_vnni" },
{ Cpu::tAVX512_BITALG, "avx512_bitalg" },
{ Cpu::tAVX512_VPOPCNTDQ, "avx512_vpopcntdq" },
{ Cpu::tAVX512_BF16, "avx512_bf16" },
{ Cpu::tAVX512_VP2INTERSECT, "avx512_vp2intersect" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
}
printf("\n");
if (cpu.has(Cpu::tPOPCNT)) {
const int n = 0x12345678; // bitcount = 13
const int ok = 13;
int r = PopCountTest(n).getCode<int (*)()>()();
if (r == ok) {
puts("popcnt ok");
} else {
printf("popcnt ng %d %d\n", r, ok);
}
}
/*
displayFamily displayModel
Opteron 2376 10 4
Core2 Duo T7100 6 F
Core i3-2120T 6 2A
Core i7-2600 6 2A
Xeon X5650 6 2C
Core i7-3517 6 3A
Core i7-3930K 6 2D
*/
cpu.putFamily();
if (!cpu.has(Cpu::tINTEL)) return;
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
}
printf("SmtLevel =%u\n", cpu.getNumCores(Xbyak::util::SmtLevel));
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
}
int main()
{
#ifdef XBYAK32
puts("32bit");
#else
puts("64bit");
#endif
putCPUinfo();
}

View File

@ -0,0 +1,228 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}</ProjectGuid>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Midl>
<TypeLibraryName>.\Debug/test_util.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/test_util.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/test_util.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Debug/test_util.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/test_util.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/test_util.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Midl>
<TypeLibraryName>.\Release/test_util.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/test_util.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Release/test_util.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/test_util.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="test_util.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -0,0 +1,380 @@
/*
toy vm
register A, B : 32bit
PC : program counter
mem_ 4byte x 65536
4byte固定
16bit
R = A or B
vldiR, imm ; R = imm
vldR, idx ; R = mem_[idx]
vstR, idx ; mem_[idx] = R
vaddiR, imm ; R += imm
vsubiR, imm ; R -= imm
vaddR, idx ; R += mem_[idx]
vsubR, idx ; R -= mem_[idx]
vputR ; print R
vjnzR, offset; if (R != 0) then jmp(PC += offset(signed))
*/
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <vector>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
#ifdef XBYAK64
#error "only 32bit"
#endif
using namespace Xbyak;
class ToyVm : public Xbyak::CodeGenerator {
typedef std::vector<uint32> Buffer;
public:
enum Reg {
A, B
};
enum Code {
LD, LDI, ST, ADD, ADDI, SUB, SUBI, PUT, JNZ,
END_OF_CODE
};
ToyVm()
: mark_(0)
{
::memset(mem_, 0, sizeof(mem_));
}
void vldi(Reg r, uint16 imm) { encode(LDI, r, imm); }
void vld(Reg r, uint16 idx) { encode(LD, r, idx); }
void vst(Reg r, uint16 idx) { encode(ST, r, idx); }
void vadd(Reg r, uint16 idx) { encode(ADD, r, idx); }
void vaddi(Reg r, uint16 imm) { encode(ADDI, r, imm); }
void vsub(Reg r, uint16 idx) { encode(SUB, r, idx); }
void vsubi(Reg r, uint16 imm) { encode(SUBI, r, imm); }
void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast<uint16>(offset)); }
void vput(Reg r) { encode(PUT, r); }
void setMark()
{
mark_ = (int)code_.size();
}
int getMarkOffset()
{
return mark_ - (int)code_.size() - 1;
}
void run()
{
bool debug = false;//true;
uint32 reg[2] = { 0, 0 };
const size_t end = code_.size();
uint32 pc = 0;
for (;;) {
uint32 x = code_[pc];
uint32 code, r, imm;
decode(code, r, imm, x);
if (debug) {
printf("---\n");
printf("A %08x B %08x\n", reg[0], reg[1]);
printf("mem_[] = %08x %08x %08x\n", mem_[0], mem_[1], mem_[2]);
printf("pc=%4d, code=%02x, r=%d, imm=%04x\n", pc, code, r, imm);
}
switch (code) {
case LDI:
reg[r] = imm;
break;
case LD:
reg[r] = mem_[imm];
break;
case ST:
mem_[imm] = reg[r];
break;
case ADD:
reg[r] += mem_[imm];
break;
case ADDI:
reg[r] += imm;
break;
case SUB:
reg[r] -= mem_[imm];
break;
case SUBI:
reg[r] -= imm;
break;
case PUT:
printf("%c %8d(0x%08x)\n", 'A' + r, reg[r], reg[r]);
break;
case JNZ:
if (reg[r] != 0) pc += static_cast<signed short>(imm);
break;
default:
assert(0);
break;
}
pc++;
if (pc >= end) break;
} // for (;;)
}
void recompile()
{
using namespace Xbyak;
/*
esi : A
edi : B
ebx : mem_
for speed up
mem_[0] : eax
mem_[1] : ecx
mem_[2] : edx
*/
push(ebx);
push(esi);
push(edi);
const Reg32 reg[2] = { esi, edi };
const Reg32 mem(ebx);
const Reg32 memTbl[] = { eax, ecx, edx };
const size_t memTblNum = NUM_OF_ARRAY(memTbl);
for (size_t i = 0; i < memTblNum; i++) xor_(memTbl[i], memTbl[i]);
xor_(esi, esi);
xor_(edi, edi);
mov(mem, (size_t)mem_);
const size_t end = code_.size();
uint32 pc = 0;
uint32 labelNum = 0;
for (;;) {
uint32 x = code_[pc];
uint32 code, r, imm;
decode(code, r, imm, x);
L(Label::toStr(labelNum++));
switch (code) {
case LDI:
mov(reg[r], imm);
break;
case LD:
if (imm < memTblNum) {
mov(reg[r], memTbl[imm]);
} else {
mov(reg[r], ptr[mem + imm * 4]);
}
break;
case ST:
if (imm < memTblNum) {
mov(memTbl[imm], reg[r]);
} else {
mov(ptr [mem + imm * 4], reg[r]);
}
break;
case ADD:
if (imm < memTblNum) {
add(reg[r], memTbl[imm]);
} else {
add(reg[r], ptr [mem + imm * 4]);
}
break;
case ADDI:
add(reg[r], imm);
break;
case SUB:
if (imm < memTblNum) {
sub(reg[r], memTbl[imm]);
} else {
sub(reg[r], ptr [mem + imm * 4]);
}
break;
case SUBI:
sub(reg[r], imm);
break;
case PUT:
{
static const char *str = "%c %8d(0x%08x)\n";
push(eax);
push(edx);
push(ecx);
push(reg[r]);
push(reg[r]);
push('A' + r);
push((int)str);
call(reinterpret_cast<const void*>(printf));
add(esp, 4 * 4);
pop(ecx);
pop(edx);
pop(eax);
}
break;
case JNZ:
test(reg[r], reg[r]);
jnz(Label::toStr(labelNum + static_cast<signed short>(imm)));
break;
default:
assert(0);
break;
}
pc++;
if (pc >= end) break;
} // for (;;)
pop(edi);
pop(esi);
pop(ebx);
ret();
}
private:
uint32 mem_[65536];
Buffer code_;
int mark_;
void decode(uint32& code, uint32& r, uint32& imm, uint32 x)
{
code = x >> 24;
r = (x >> 16) & 0xff;
imm = x & 0xffff;
}
void encode(Code code, Reg r, uint16 imm = 0)
{
uint32 x = (code << 24) | (r << 16) | imm;
code_.push_back(x);
}
};
class Fib : public ToyVm {
public:
Fib(int n)
{
if (n >= 65536) {
fprintf(stderr, "current version support only imm16\n");
return;
}
/*
A : c
B : temporary
mem_[0] : p
mem_[1] : t
mem_[2] : n
*/
vldi(A, 1); // c
vst(A, 0); // p(1)
vldi(B, static_cast<uint16>(n));
vst(B, 2); // n
// lp
setMark();
vst(A, 1); // t = c
vadd(A, 0); // c += p
vld(B, 1);
vst(B, 0); // p = t
// vput(A);
vld(B, 2);
vsubi(B, 1);
vst(B, 2); // n--
vjnz(B, getMarkOffset());
vput(A);
}
void runByJIT()
{
getCode<void (*)()>();
}
};
void fibC(uint32 n)
{
uint32 p, c, t;
p = 1;
c = 1;
lp:
t = c;
c += p;
p = t;
n--;
if (n != 0) goto lp;
printf("c=%d(0x%08x)\n", c, c);
}
int main()
{
try {
const int n = 10000;
Fib fib(n);
fib.recompile();
{
Xbyak::util::Clock clk;
clk.begin();
fib.run();
clk.end();
printf("vm %.2fKclk\n", clk.getClock() * 1e-3);
}
{
Xbyak::util::Clock clk;
clk.begin();
fib.runByJIT();
clk.end();
printf("jit %.2fKclk\n", clk.getClock() * 1e-3);
}
{
Xbyak::util::Clock clk;
clk.begin();
fibC(n);
clk.end();
printf("native C %.2fKclk\n", clk.getClock() * 1e-3);
}
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
return 0;
}
/*
the code generated by Xbyak
push ebx
push esi
push edi
xor eax,eax
xor ecx,ecx
xor edx,edx
xor esi,esi
xor edi,edi
mov ebx,0EFF58h
mov esi,1
mov eax,esi
mov edi,2710h
mov edx,edi
.lp:
mov ecx,esi
add esi,eax
mov edi,ecx
mov eax,edi
mov edi,edx
sub edi,1
mov edx,edi
test edi,edi
jne .lp
push eax
push edx
push ecx
push esi
push esi
push 41h
push 42C434h
call printf (409342h)
add esp,10h
pop ecx
pop edx
pop eax
pop edi
pop esi
pop ebx
ret
*/

View File

@ -0,0 +1,228 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{2E41C7AF-39FF-454C-B081-37445378DCB3}</ProjectGuid>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Midl>
<TypeLibraryName>.\Debug/toyvm.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/toyvm.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/toyvm.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Midl>
<TypeLibraryName>.\Release/toyvm.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/toyvm.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX86</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Debug/toyvm.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<PrecompiledHeaderOutputFile>.\Debug/toyvm.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Debug/toyvm.pdb</ProgramDatabaseFile>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
<TypeLibraryName>.\Release/toyvm.tlb</TypeLibraryName>
<HeaderFileName />
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>.\Release/toyvm.pch</PrecompiledHeaderOutputFile>
<WarningLevel>Level4</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0411</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention />
<TargetMachine>MachineX64</TargetMachine>
</Link>
<Bscmake>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Bscmake>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="toyvm.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -0,0 +1,85 @@
TARGET = make_nm normalize_prefix jmp address bad_address misc cvt_test cvt_test32
XBYAK_INC=../xbyak/xbyak.h
BIT=32
ifeq ($(shell uname -m),x86_64)
BIT=64
endif
ifeq ($(BIT),64)
TARGET += jmp64 address64
endif
all: $(TARGET)
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) #-std=c++0x
make_nm:
$(CXX) $(CFLAGS) make_nm.cpp -o $@
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) normalize_prefix.cpp -o $@
test_mmx: test_mmx.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) test_mmx.cpp -o $@ -lpthread
jmp: jmp.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m32
jmp64: jmp.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m64
address: address.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) address.cpp -o $@ -m32
address64: address.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) address.cpp -o $@ -m64
bad_address: bad_address.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) bad_address.cpp -o $@
misc: misc.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) misc.cpp -o $@
cvt_test: cvt_test.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) $< -o $@
cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
test_nm: normalize_prefix jmp bad_address $(TARGET)
$(MAKE) -C ../gen
./test_nm.sh
./test_nm.sh Y
./test_nm.sh avx512
./test_address.sh
./jmp
./bad_address
./misc
./cvt_test
./cvt_test32
ifeq ($(BIT),64)
./test_address.sh 64
./test_nm.sh 64
./test_nm.sh Y64
./jmp64
endif
test_avx: normalize_prefix
./test_avx.sh
./test_avx.sh Y
ifeq ($(BIT),64)
./test_address.sh 64
./test_avx.sh 64
./test_avx.sh Y64
endif
test_avx512: normalize_prefix
./test_avx512.sh
ifeq ($(BIT),64)
./test_avx512.sh 64
endif
test:
$(MAKE) test_nm
$(MAKE) test_avx
$(MAKE) test_avx512
clean:
rm -rf *.o $(TARGET) lib_run nm.cpp nm_frame make_512
lib_run: lib_test.cpp lib_run.cpp lib.h
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
make_nm: make_nm.cpp $(XBYAK_INC)

View File

@ -0,0 +1,14 @@
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
../gen/gen_code.exe > $@
../gen/gen_avx512.exe >> $@
../gen/gen_code.exe: ../gen/gen_code.cpp #../xbyak/xbyak.h
cl ../gen/gen_code.cpp $(OPT) /Fe:../gen/gen_code.exe
../gen/gen_avx512.exe: ../gen/gen_avx512.cpp #../xbyak/xbyak.h
cl ../gen/gen_avx512.cpp $(OPT) /Fe:../gen/gen_avx512.exe
SUB_HEADER=../xbyak/xbyak_mnemonic.h
all: $(SUB_HEADER)

View File

@ -0,0 +1,9 @@
@echo off
echo 32bit
rm -rf a.lst b.lst
echo nasm
nasm -l a.lst -f win32 -DWIN32 test.asm
cat a.lst
echo yasm
yasm -l b.lst -f win32 -DWIN32 test.asm
cat b.lst

View File

@ -0,0 +1,155 @@
#include <stdio.h>
#include <string.h>
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
void genVsibSub(bool isJIT, const char *name, const char *tbl[], size_t tblSize)
{
for (size_t i = 0; i < tblSize; i++) {
if (isJIT) {
printf("%s (ymm7, ptr[", name);
} else {
printf("%s ymm7, [", name);
}
printf("%s", tbl[i]);
if (isJIT) {
printf("], ymm4); dump();\n");
} else {
printf("], ymm4\n");
}
}
}
void genVsib(bool isJIT)
{
if (isJIT) puts("void genVsib() {");
const char *vm32xTbl[] = {
"xmm0",
"xmm0 * 1",
"xmm0 + 4",
"xmm0 + eax",
"xmm0 * 4 + ecx",
"xmm3 * 8 + edi + 123",
"xmm2 * 2 + 5",
"eax + xmm0",
"esp + xmm4",
};
const char *vm32yTbl[] = {
"ymm0",
"ymm0 * 1",
"ymm0 + 4",
"ymm0 + eax",
"ymm0 * 4 + ecx",
"ymm3 * 8 + edi + 123",
"ymm2 * 2 + 5",
"eax + ymm0",
"esp + ymm4",
};
genVsibSub(isJIT, "vgatherdpd", vm32xTbl, NUM_OF_ARRAY(vm32xTbl));
genVsibSub(isJIT, "vgatherqpd", vm32yTbl, NUM_OF_ARRAY(vm32yTbl));
#ifdef XBYAK64
const char *vm32x64Tbl[] = {
"xmm0 + r11",
"r13 + xmm15",
"123 + rsi + xmm2 * 4",
};
genVsibSub(isJIT, "vgatherdpd", vm32x64Tbl, NUM_OF_ARRAY(vm32x64Tbl));
#endif
if (isJIT) puts("}");
}
void genAddress(bool isJIT, const char regTbl[][5], size_t regTblNum)
{
int count = 0;
int funcNum = 1;
if (isJIT) {
puts("void gen0(){");
}
for (size_t i = 0; i < regTblNum + 1; i++) {
const char *base = regTbl[i];
for (size_t j = 0; j < regTblNum + 1; j++) {
if (j == 4) continue; /* esp is not index register */
const char *index = regTbl[j];
static const int scaleTbl[] = { 0, 1, 2, 4, 8 };
for (size_t k = 0; k < NUM_OF_ARRAY(scaleTbl); k++) {
int scale = scaleTbl[k];
static const int dispTbl[] = { 0, 1, 1000, -1, -1000 };
for (size_t m = 0; m < NUM_OF_ARRAY(dispTbl); m++) {
int disp = dispTbl[m];
bool isFirst = true;
if (isJIT) {
printf("mov (ecx, ptr[");
} else {
printf("mov ecx, [");
}
if (i < regTblNum) {
printf("%s", base);
isFirst = false;
}
if (j < regTblNum) {
if (!isFirst) putchar('+');
printf("%s", index);
if (scale) printf("*%d", scale);
isFirst = false;
}
if (isFirst) {
if (isJIT) printf("(void*)");
printf("0x%08X", disp);
} else {
if (disp >= 0) {
putchar('+');
}
printf("%d", disp);
isFirst = false;
}
if (isJIT) {
printf("]); dump();\n");
} else {
printf("]\n");
}
if (isJIT) {
count++;
if ((count % 100) == 0) {
printf("}\n void gen%d(){\n", funcNum++);
}
}
}
}
}
}
if (isJIT) puts("}");
genVsib(isJIT);
if (isJIT) {
printf("void gen(){\n");
for (int i = 0; i < funcNum; i++) {
printf(" gen%d();\n", i);
}
puts("genVsib();");
printf("}\n");
}
}
int main(int argc, char *argv[])
{
argc--, argv++;
bool phase = argc > 0 && strcmp(*argv, "1") == 0;
bool isJIT = (argc > 1);
fprintf(stderr, "phase:%c %s\n", phase ? '1' : '2', isJIT ? "jit" : "asm");
if (phase) {
fprintf(stderr, "32bit reg\n");
static const char reg32Tbl[][5] = {
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
#ifdef XBYAK64
"r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
#endif
};
genAddress(isJIT, reg32Tbl, NUM_OF_ARRAY(reg32Tbl));
} else {
#ifdef XBYAK64
fprintf(stderr, "64bit reg\n");
static const char reg64Tbl[][5] = {
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
};
genAddress(isJIT, reg64Tbl, NUM_OF_ARRAY(reg64Tbl));
#endif
}
}

View File

@ -0,0 +1,28 @@
#include <xbyak/xbyak.h>
#include <cybozu/test.hpp>
struct Code : Xbyak::CodeGenerator {
Code()
{
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp + esp]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [ax]), std::exception); // not support
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp * 4]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 16]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0]), std::exception);
CYBOZU_TEST_EXCEPTION(fld(dword [xmm0]), std::exception);
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3), std::exception);
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3), std::exception);
#ifdef XBYAK64
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [rax + eax]), std::exception);
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0]), std::exception);
#endif
}
};
CYBOZU_TEST_AUTO(exception)
{
Code c;
}

View File

@ -0,0 +1,151 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
#include <cybozu/inttype.hpp>
#include <cybozu/test.hpp>
using namespace Xbyak;
using namespace Xbyak::util;
#ifdef XBYAK64
const struct Ptn {
const Reg8 *reg8;
Reg16 reg16;
Reg32 reg32;
Reg64 reg64;
} tbl[] = {
{ &al, ax, eax, rax },
{ &bl, bx, ebx, rbx },
{ &cl, cx, ecx, rcx },
{ &dl, dx, edx, rdx },
{ &sil, si, esi, rsi },
{ &dil, di, edi, rdi },
{ &bpl, bp, ebp, rbp },
{ &spl, sp, esp, rsp },
{ &r8b, r8w, r8d, r8 },
{ &r9b, r9w, r9d, r9 },
{ &r10b, r10w, r10d, r10 },
{ &r11b, r11w, r11d, r11 },
{ &r12b, r12w, r12d, r12 },
{ &r13b, r13w, r13d, r13 },
{ &r14b, r14w, r14d, r14 },
{ &r15b, r15w, r15d, r15 },
};
#else
const struct Ptn {
const Reg8 *reg8;
Reg16 reg16;
Reg32 reg32;
} tbl[] = {
{ &al, ax, eax },
{ &bl, bx, ebx },
{ &cl, cx, ecx },
{ &dl, dx, edx },
{ 0, si, esi },
{ 0, di, edi },
{ 0, bp, ebp },
{ 0, sp, esp },
};
#endif
CYBOZU_TEST_AUTO(cvt)
{
for (size_t i = 0; i < sizeof(tbl) / sizeof(tbl[0]); i++) {
if (tbl[i].reg8) {
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt8() == *tbl[i].reg8);
}
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt32() == tbl[i].reg32);
#ifdef XBYAK64
if (tbl[i].reg8) {
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt64() == tbl[i].reg64);
}
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt64() == tbl[i].reg64);
#endif
}
{
const Reg8 errTbl[] = {
ah, bh, ch, dh
};
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt16(), std::exception);
}
}
#ifdef XBYAK32
{
const Reg16 errTbl[] = {
si, di, bp, sp
};
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt8(), std::exception);
}
}
#endif
}
CYBOZU_TEST_AUTO(changeBit)
{
using namespace Xbyak::util;
#ifdef XBYAK64
const size_t N = 7;
const Reg* tbl[][N] = {
{ &al, &ax, &eax, &rax, &xmm0, &ymm0, &zmm0 },
{ &cl, &cx, &ecx, &rcx, &xmm1, &ymm1, &zmm1 },
{ &dl, &dx, &edx, &rdx, &xmm2, &ymm2, &zmm2 },
{ &bl, &bx, &ebx, &rbx, &xmm3, &ymm3, &zmm3 },
{ &spl, &sp, &esp, &rsp, &xmm4, &ymm4, &zmm4 },
{ &bpl, &bp, &ebp, &rbp, &xmm5, &ymm5, &zmm5 },
{ &sil, &si, &esi, &rsi, &xmm6, &ymm6, &zmm6 },
{ &dil, &di, &edi, &rdi, &xmm7, &ymm7, &zmm7 },
{ &r8b, &r8w, &r8d, &r8, &xmm8, &ymm8, &zmm8 },
{ &r15b, &r15w, &r15d, &r15, &xmm15, &ymm15, &zmm15 },
{ 0, 0, 0, 0, &xmm16, &ymm16, &zmm16 },
{ 0, 0, 0, 0, &xmm31, &ymm31, &zmm31 },
};
const int bitTbl[N] = { 8, 16, 32, 64, 128, 256, 512 };
#else
const size_t N = 6;
const Reg* tbl[][N] = {
{ &al, &ax, &eax, &xmm0, &ymm0, &zmm0 },
{ &cl, &cx, &ecx, &xmm1, &ymm1, &zmm1 },
{ &dl, &dx, &edx, &xmm2, &ymm2, &zmm2 },
{ &bl, &bx, &ebx, &xmm3, &ymm3, &zmm3 },
{ 0, &sp, &esp, &xmm4, &ymm4, &zmm4 },
{ 0, &bp, &ebp, &xmm5, &ymm5, &zmm5 },
{ 0, &si, &esi, &xmm6, &ymm6, &zmm6 },
{ 0, &di, &edi, &xmm7, &ymm7, &zmm7 },
};
const int bitTbl[N] = { 8, 16, 32, 128, 256, 512 };
#endif
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
for (size_t j = 0; j < N; j++) {
const Reg *r1 = tbl[i][j];
if (r1 == 0) continue;
for (size_t k = 0; k < N; k++) {
if (tbl[i][k]) {
CYBOZU_TEST_ASSERT(*tbl[i][k] == r1->changeBit(bitTbl[k]));
// printf("%s->changeBit(%d)=%s %s\n", r1->toString(), bitTbl[k], r1->changeBit(bitTbl[k]).toString(), tbl[i][k]->toString());
} else {
CYBOZU_TEST_EXCEPTION(r1->changeBit(bitTbl[k]), std::exception);
}
}
}
}
#ifdef XBYAK64
const Reg8 *special8bitTbl[] = { &ah, &bh, &ch, &dh };
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(special8bitTbl); i++) {
CYBOZU_TEST_EXCEPTION(special8bitTbl[i]->changeBit(16), std::exception);
}
#endif
}

View File

@ -0,0 +1,163 @@
#pragma once
/**
@file
@brief int type definition and macros
@author MITSUNARI Shigeo(@herumi)
*/
#if defined(_MSC_VER) && (MSC_VER <= 1500) && !defined(CYBOZU_DEFINED_INTXX)
#define CYBOZU_DEFINED_INTXX
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef unsigned int uint32_t;
typedef int int32_t;
typedef unsigned short uint16_t;
typedef short int16_t;
typedef unsigned char uint8_t;
typedef signed char int8_t;
#else
#include <stdint.h>
#endif
#ifdef _MSC_VER
#ifndef CYBOZU_DEFINED_SSIZE_T
#define CYBOZU_DEFINED_SSIZE_T
#ifdef _WIN64
typedef int64_t ssize_t;
#else
typedef int32_t ssize_t;
#endif
#endif
#else
#include <unistd.h> // for ssize_t
#endif
#ifndef CYBOZU_ALIGN
#ifdef _MSC_VER
#define CYBOZU_ALIGN(x) __declspec(align(x))
#else
#define CYBOZU_ALIGN(x) __attribute__((aligned(x)))
#endif
#endif
#ifndef CYBOZU_FORCE_INLINE
#ifdef _MSC_VER
#define CYBOZU_FORCE_INLINE __forceinline
#else
#define CYBOZU_FORCE_INLINE __attribute__((always_inline))
#endif
#endif
#ifndef CYBOZU_UNUSED
#ifdef __GNUC__
#define CYBOZU_UNUSED __attribute__((unused))
#else
#define CYBOZU_UNUSED
#endif
#endif
#ifndef CYBOZU_ALLOCA
#ifdef _MSC_VER
#include <malloc.h>
#define CYBOZU_ALLOCA(x) _malloca(x)
#else
#define CYBOZU_ALLOCA(x) __builtin_alloca(x)
#endif
#endif
#ifndef CYBOZU_NUM_OF_ARRAY
#define CYBOZU_NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
#endif
#ifndef CYBOZU_SNPRINTF
#if defined(_MSC_VER) && (_MSC_VER < 1900)
#define CYBOZU_SNPRINTF(x, len, ...) (void)_snprintf_s(x, len, len - 1, __VA_ARGS__)
#else
#define CYBOZU_SNPRINTF(x, len, ...) (void)snprintf(x, len, __VA_ARGS__)
#endif
#endif
#define CYBOZU_CPP_VERSION_CPP03 0
#define CYBOZU_CPP_VERSION_TR1 1
#define CYBOZU_CPP_VERSION_CPP11 2
#define CYBOZU_CPP_VERSION_CPP14 3
#define CYBOZU_CPP_VERSION_CPP17 4
#ifdef __GNUC__
#define CYBOZU_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
#else
#define CYBOZU_GNUC_PREREQ(major, minor) 0
#endif
#if (__cplusplus >= 201703)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP17
#elif (__cplusplus >= 201402)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP14
#elif (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
#if defined(_MSC_VER) && (_MSC_VER <= 1600)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
#else
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
#endif
#elif CYBOZU_GNUC_PREREQ(4, 5) || (CYBOZU_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || (__clang_major__ >= 3)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
#else
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03
#endif
#ifdef CYBOZU_USE_BOOST
#define CYBOZU_NAMESPACE_STD boost
#define CYBOZU_NAMESPACE_TR1_BEGIN
#define CYBOZU_NAMESPACE_TR1_END
#elif (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1) && !defined(__APPLE__)
#define CYBOZU_NAMESPACE_STD std::tr1
#define CYBOZU_NAMESPACE_TR1_BEGIN namespace tr1 {
#define CYBOZU_NAMESPACE_TR1_END }
#else
#define CYBOZU_NAMESPACE_STD std
#define CYBOZU_NAMESPACE_TR1_BEGIN
#define CYBOZU_NAMESPACE_TR1_END
#endif
#ifndef CYBOZU_OS_BIT
#if defined(_WIN64) || defined(__x86_64__) || defined(__AARCH64EL__) || defined(__EMSCRIPTEN__)
#define CYBOZU_OS_BIT 64
#else
#define CYBOZU_OS_BIT 32
#endif
#endif
#ifndef CYBOZU_HOST
#define CYBOZU_HOST_UNKNOWN 0
#define CYBOZU_HOST_INTEL 1
#define CYBOZU_HOST_ARM 2
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
#define CYBOZU_HOST CYBOZU_HOST_INTEL
#elif defined(__arm__) || defined(__AARCH64EL__)
#define CYBOZU_HOST CYBOZU_HOST_ARM
#else
#define CYBOZU_HOST CYBOZU_HOST_UNKNOWN
#endif
#endif
#ifndef CYBOZU_ENDIAN
#define CYBOZU_ENDIAN_UNKNOWN 0
#define CYBOZU_ENDIAN_LITTLE 1
#define CYBOZU_ENDIAN_BIG 2
#if (CYBOZU_HOST == CYBOZU_HOST_INTEL)
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
#elif (CYBOZU_HOST == CYBOZU_HOST_ARM) && (defined(__ARM_EABI__) || defined(__AARCH64EL__))
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
#else
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_UNKNOWN
#endif
#endif
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
#define CYBOZU_NOEXCEPT noexcept
#else
#define CYBOZU_NOEXCEPT throw()
#endif
namespace cybozu {
template<class T>
void disable_warning_unused_variable(const T&) { }
template<class T, class S>
T cast(const S* ptr) { return static_cast<T>(static_cast<const void*>(ptr)); }
template<class T, class S>
T cast(S* ptr) { return static_cast<T>(static_cast<void*>(ptr)); }
} // cybozu

View File

@ -0,0 +1,373 @@
#pragma once
/**
@file
@brief unit test class
@author MITSUNARI Shigeo(@herumi)
*/
#include <stdio.h>
#include <string.h>
#include <string>
#include <list>
#include <iostream>
#include <utility>
#if defined(_MSC_VER) && (MSC_VER <= 1500)
#include <cybozu/inttype.hpp>
#else
#include <stdint.h>
#endif
namespace cybozu { namespace test {
class AutoRun {
typedef void (*Func)();
typedef std::list<std::pair<const char*, Func> > UnitTestList;
public:
AutoRun()
: init_(0)
, term_(0)
, okCount_(0)
, ngCount_(0)
, exceptionCount_(0)
{
}
void setup(Func init, Func term)
{
init_ = init;
term_ = term;
}
void append(const char *name, Func func)
{
list_.push_back(std::make_pair(name, func));
}
void set(bool isOK)
{
if (isOK) {
okCount_++;
} else {
ngCount_++;
}
}
std::string getBaseName(const std::string& name) const
{
#ifdef _WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
size_t pos = name.find_last_of(sep);
std::string ret = name.substr(pos + 1);
pos = ret.find('.');
return ret.substr(0, pos);
}
int run(int, char *argv[])
{
std::string msg;
try {
if (init_) init_();
for (UnitTestList::const_iterator i = list_.begin(), ie = list_.end(); i != ie; ++i) {
std::cout << "ctest:module=" << i->first << std::endl;
try {
(i->second)();
} catch (std::exception& e) {
exceptionCount_++;
std::cout << "ctest: " << i->first << " is stopped by exception " << e.what() << std::endl;
} catch (...) {
exceptionCount_++;
std::cout << "ctest: " << i->first << " is stopped by unknown exception" << std::endl;
}
}
if (term_) term_();
} catch (std::exception& e) {
msg = std::string("ctest:err:") + e.what();
} catch (...) {
msg = "ctest:err: catch unknown exception";
}
fflush(stdout);
if (msg.empty()) {
int err = ngCount_ + exceptionCount_;
int total = okCount_ + err;
std::cout << "ctest:name=" << getBaseName(*argv)
<< ", module=" << list_.size()
<< ", total=" << total
<< ", ok=" << okCount_
<< ", ng=" << ngCount_
<< ", exception=" << exceptionCount_ << std::endl;
return err > 0 ? 1 : 0;
} else {
std::cout << msg << std::endl;
return 1;
}
}
static inline AutoRun& getInstance()
{
static AutoRun instance;
return instance;
}
private:
Func init_;
Func term_;
int okCount_;
int ngCount_;
int exceptionCount_;
UnitTestList list_;
};
static AutoRun& autoRun = AutoRun::getInstance();
inline void test(bool ret, const std::string& msg, const std::string& param, const char *file, int line)
{
autoRun.set(ret);
if (!ret) {
printf("%s(%d):ctest:%s(%s);\n", file, line, msg.c_str(), param.c_str());
}
}
template<typename T, typename U>
bool isEqual(const T& lhs, const U& rhs)
{
return lhs == rhs;
}
// avoid warning of comparision of integers of different signs
inline bool isEqual(size_t lhs, int rhs)
{
return lhs == size_t(rhs);
}
inline bool isEqual(int lhs, size_t rhs)
{
return size_t(lhs) == rhs;
}
inline bool isEqual(const char *lhs, const char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
inline bool isEqual(char *lhs, const char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
inline bool isEqual(const char *lhs, char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
inline bool isEqual(char *lhs, char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
// avoid to compare float directly
inline bool isEqual(float lhs, float rhs)
{
union fi {
float f;
uint32_t i;
} lfi, rfi;
lfi.f = lhs;
rfi.f = rhs;
return lfi.i == rfi.i;
}
// avoid to compare double directly
inline bool isEqual(double lhs, double rhs)
{
union di {
double d;
uint64_t i;
} ldi, rdi;
ldi.d = lhs;
rdi.d = rhs;
return ldi.i == rdi.i;
}
} } // cybozu::test
#ifndef CYBOZU_TEST_DISABLE_AUTO_RUN
int main(int argc, char *argv[])
{
return cybozu::test::autoRun.run(argc, argv);
}
#endif
/**
alert if !x
@param x [in]
*/
#define CYBOZU_TEST_ASSERT(x) cybozu::test::test(!!(x), "CYBOZU_TEST_ASSERT", #x, __FILE__, __LINE__)
/**
alert if x != y
@param x [in]
@param y [in]
*/
#define CYBOZU_TEST_EQUAL(x, y) { \
bool _cybozu_eq = cybozu::test::isEqual(x, y); \
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
if (!_cybozu_eq) { \
std::cout << "ctest: lhs=" << (x) << std::endl; \
std::cout << "ctest: rhs=" << (y) << std::endl; \
} \
}
/**
alert if fabs(x, y) >= eps
@param x [in]
@param y [in]
*/
#define CYBOZU_TEST_NEAR(x, y, eps) { \
bool _cybozu_isNear = fabs((x) - (y)) < eps; \
cybozu::test::test(_cybozu_isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
if (!_cybozu_isNear) { \
std::cout << "ctest: lhs=" << (x) << std::endl; \
std::cout << "ctest: rhs=" << (y) << std::endl; \
} \
}
#define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
bool _cybozu_eq = x == y; \
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
if (!_cybozu_eq) { \
std::cout << "ctest: lhs=" << static_cast<const void*>(x) << std::endl; \
std::cout << "ctest: rhs=" << static_cast<const void*>(y) << std::endl; \
} \
}
/**
alert if x[] != y[]
@param x [in]
@param y [in]
@param n [in]
*/
#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n) { \
for (size_t _cybozu_test_i = 0, _cybozu_ie = (size_t)(n); _cybozu_test_i < _cybozu_ie; _cybozu_test_i++) { \
bool _cybozu_eq = cybozu::test::isEqual((x)[_cybozu_test_i], (y)[_cybozu_test_i]); \
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_ARRAY", #x ", " #y ", " #n, __FILE__, __LINE__); \
if (!_cybozu_eq) { \
std::cout << "ctest: i=" << _cybozu_test_i << std::endl; \
std::cout << "ctest: lhs=" << (x)[_cybozu_test_i] << std::endl; \
std::cout << "ctest: rhs=" << (y)[_cybozu_test_i] << std::endl; \
} \
} \
}
/**
always alert
@param msg [in]
*/
#define CYBOZU_TEST_FAIL(msg) cybozu::test::test(false, "CYBOZU_TEST_FAIL", msg, __FILE__, __LINE__)
/**
verify message in exception
*/
#define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
{ \
int _cybozu_ret = 0; \
std::string _cybozu_errMsg; \
try { \
statement; \
_cybozu_ret = 1; \
} catch (const Exception& _cybozu_e) { \
_cybozu_errMsg = _cybozu_e.what(); \
if (_cybozu_errMsg.find(msg) == std::string::npos) { \
_cybozu_ret = 2; \
} \
} catch (...) { \
_cybozu_ret = 3; \
} \
if (_cybozu_ret) { \
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
if (_cybozu_ret == 1) { \
std::cout << "ctest: no exception" << std::endl; \
} else if (_cybozu_ret == 2) { \
std::cout << "ctest: bad exception msg:" << _cybozu_errMsg << std::endl; \
} else { \
std::cout << "ctest: unexpected exception" << std::endl; \
} \
} else { \
cybozu::test::autoRun.set(true); \
} \
}
#define CYBOZU_TEST_EXCEPTION(statement, Exception) \
{ \
int _cybozu_ret = 0; \
try { \
statement; \
_cybozu_ret = 1; \
} catch (const Exception&) { \
} catch (...) { \
_cybozu_ret = 2; \
} \
if (_cybozu_ret) { \
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
if (_cybozu_ret == 1) { \
std::cout << "ctest: no exception" << std::endl; \
} else { \
std::cout << "ctest: unexpected exception" << std::endl; \
} \
} else { \
cybozu::test::autoRun.set(true); \
} \
}
/**
verify statement does not throw
*/
#define CYBOZU_TEST_NO_EXCEPTION(statement) \
try { \
statement; \
cybozu::test::autoRun.set(true); \
} catch (...) { \
cybozu::test::test(false, "CYBOZU_TEST_NO_EXCEPTION", #statement, __FILE__, __LINE__); \
}
/**
append auto unit test
@param name [in] module name
*/
#define CYBOZU_TEST_AUTO(name) \
void cybozu_test_ ## name(); \
struct cybozu_test_local_ ## name { \
cybozu_test_local_ ## name() \
{ \
cybozu::test::autoRun.append(#name, cybozu_test_ ## name); \
} \
} cybozu_test_local_instance_ ## name; \
void cybozu_test_ ## name()
/**
append auto unit test with fixture
@param name [in] module name
*/
#define CYBOZU_TEST_AUTO_WITH_FIXTURE(name, Fixture) \
void cybozu_test_ ## name(); \
void cybozu_test_real_ ## name() \
{ \
Fixture f; \
cybozu_test_ ## name(); \
} \
struct cybozu_test_local_ ## name { \
cybozu_test_local_ ## name() \
{ \
cybozu::test::autoRun.append(#name, cybozu_test_real_ ## name); \
} \
} cybozu_test_local_instance_ ## name; \
void cybozu_test_ ## name()
/**
setup fixture
@param Fixture [in] class name of fixture
@note cstr of Fixture is called before test and dstr of Fixture is called after test
*/
#define CYBOZU_TEST_SETUP_FIXTURE(Fixture) \
Fixture *cybozu_test_local_fixture; \
void cybozu_test_local_init() \
{ \
cybozu_test_local_fixture = new Fixture(); \
} \
void cybozu_test_local_term() \
{ \
delete cybozu_test_local_fixture; \
} \
struct cybozu_test_local_fixture_setup_ { \
cybozu_test_local_fixture_setup_() \
{ \
cybozu::test::autoRun.setup(cybozu_test_local_init, cybozu_test_local_term); \
} \
} cybozu_test_local_fixture_setup_instance_;

1380
core/deps/xbyak/test/jmp.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
þ½Ž¿
Microsoft Visual Studio Solution File, Format Version 10.00
# Visual C++ Express 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jmp", "jmp.vcproj", "{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Debug|Win32.ActiveCfg = Debug|Win32
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Debug|Win32.Build.0 = Debug|Win32
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Release|Win32.ActiveCfg = Release|Win32
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,195 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="jmp"
ProjectGUID="{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}"
RootNamespace="jmp"
Keyword="Win32Proj"
TargetFrameworkVersion="196613"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)/../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
AdditionalIncludeDirectories="$(SolutionDir)/../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="ソース ファイル"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\jmp.cpp"
>
</File>
</Filter>
<Filter
Name="ヘッダー ファイル"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
</Filter>
<Filter
Name="リソース ファイル"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -0,0 +1,63 @@
#pragma once
#include <stdio.h>
struct Reg {
int r_;
Reg(int r) : r_(r) {}
};
inline const Reg& getReg0() { static const Reg r(0); return r; }
inline const Reg& getReg1() { static const Reg r(1); return r; }
inline const Reg& getReg2() { static const Reg r(2); return r; }
static const Reg& r0 = getReg0();
static const Reg& r1 = getReg1();
static const Reg& r2 = getReg2();
inline void putReg()
{
puts("putReg");
printf("r0=%p, %d\n", &r0, r0.r_);
printf("r0=%p, %d\n", &r0, r1.r_);
printf("r0=%p, %d\n", &r0, r2.r_);
}
struct A {
int a;
A()
: a(5)
{
puts("A cstr");
}
~A()
{
puts("A dstr");
}
void put() const
{
printf("a=%d\n", a);
}
};
template<int dummy = 0>
struct XT {
static A a;
};
template<int dummy>
A XT<dummy>::a;
typedef XT<0> X;
void init();
struct Init {
Init()
{
puts("Init");
init();
putReg();
}
};
static Init s_init;

View File

@ -0,0 +1,51 @@
#include <stdio.h>
static const struct XXX {
XXX() { puts("XXX"); }
} s_sss;
struct A {
int aaa;
A()
: aaa(123)
{
puts("A cstr");
}
~A()
{
puts("A dstr");
}
void put() const
{
printf("aaa=%d\n", aaa);
}
};
template<int dummy = 0>
struct XT {
static A sss;
};
template<int dummy>
A XT<dummy>::sss;
typedef XT<0> X;
static struct Init {
Init()
{
puts("Init");
X::sss.put();
}
} s_init;
int f() { puts("f"); return 4; }
static const int r = f();
int main()
{
puts("main");
printf("r=%d\n", r);
X::sss.put();
}

View File

@ -0,0 +1,9 @@
#include "lib.h"
int main()
{
puts("main");
X::a.put();
putReg();
}

View File

@ -0,0 +1,13 @@
#include "lib.h"
void init()
{
static bool init = true;
printf("in lib_test %d\n", init);
if (!init) return;
init = false;
X::a.put();
putReg();
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,724 @@
#include <stdio.h>
#include <string.h>
#include <string>
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
#include <cybozu/inttype.hpp>
#include <cybozu/test.hpp>
using namespace Xbyak;
CYBOZU_TEST_AUTO(setSize)
{
struct Code : Xbyak::CodeGenerator {
Code() : Xbyak::CodeGenerator(4096)
{
setSize(4095);
db(1);
size_t size = getSize();
CYBOZU_TEST_EQUAL(size, 4096u);
CYBOZU_TEST_NO_EXCEPTION(setSize(size));
CYBOZU_TEST_EXCEPTION(db(1), Xbyak::Error);
}
} code;
}
CYBOZU_TEST_AUTO(compOperand)
{
using namespace Xbyak::util;
CYBOZU_TEST_ASSERT(eax == eax);
CYBOZU_TEST_ASSERT(ecx != xmm0);
CYBOZU_TEST_ASSERT(ptr[eax] == ptr[eax]);
CYBOZU_TEST_ASSERT(dword[eax] != ptr[eax]);
CYBOZU_TEST_ASSERT(ptr[eax] != ptr[eax+3]);
}
CYBOZU_TEST_AUTO(mov_const)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
const struct {
uint64_t v;
int bit;
bool error;
} tbl[] = {
{ uint64_t(-1), 8, false },
{ 0x12, 8, false },
{ 0x80, 8, false },
{ 0xff, 8, false },
{ 0x100, 8, true },
{ 1, 16, false },
{ uint64_t(-1), 16, false },
{ 0x7fff, 16, false },
{ 0xffff, 16, false },
{ 0x10000, 16, true },
{ uint64_t(-1), 32, false },
{ 0x7fffffff, 32, false },
{ uint64_t(-0x7fffffff), 32, false },
{ 0xffffffff, 32, false },
{ 0x100000000ull, 32, true },
#ifdef XBYAK64
{ uint64_t(-1), 64, false },
{ 0x7fffffff, 64, false },
{ 0xffffffffffffffffull, 64, false },
{ 0x80000000, 64, true },
{ 0xffffffff, 64, true },
#endif
};
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
const int bit = tbl[i].bit;
const uint64_t v = tbl[i].v;
const Xbyak::AddressFrame& af = bit == 8 ? byte : bit == 16 ? word : bit == 32 ? dword : qword;
if (tbl[i].error) {
CYBOZU_TEST_EXCEPTION(mov(af[eax], v), Xbyak::Error);
} else {
CYBOZU_TEST_NO_EXCEPTION(mov(af[eax], v));
}
}
}
} code;
}
CYBOZU_TEST_AUTO(align)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
const size_t alignSize = 16;
for (int padding = 0; padding < 20; padding++) {
for (int i = 0; i < padding; i++) {
db(1);
}
align(alignSize);
CYBOZU_TEST_EQUAL(size_t(getCurr()) % alignSize, 0u);
}
align(alignSize);
const uint8 *p = getCurr();
// do nothing if aligned
align(alignSize);
CYBOZU_TEST_EQUAL(p, getCurr());
}
} c;
}
#ifdef XBYAK64
CYBOZU_TEST_AUTO(vfmaddps)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf2, 0x3f, 0x48, 0x9a, 0x4a, 0x04,
0x62, 0x72, 0x3f, 0x08, 0x9b, 0x78, 0x04,
0x62, 0xf2, 0x6f, 0x4d, 0xaa, 0x69, 0x08,
0x62, 0x62, 0x6f, 0x08, 0xab, 0x7c, 0x24, 0x08,
0x62, 0xe2, 0x77, 0xcf, 0x52, 0x78, 0x04,
0x62, 0x72, 0x67, 0x4c, 0x53, 0x54, 0x84, 0x04,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vaes)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xE2, 0x0D, 0x00, 0xDE, 0x61, 0x04,
0xC4, 0xE2, 0x6D, 0xDE, 0x49, 0x40,
0x62, 0xF2, 0x6D, 0x48, 0xDE, 0x49, 0x01,
0x62, 0xE2, 0x0D, 0x00, 0xDF, 0x60, 0x04,
0x62, 0xE2, 0x0D, 0x20, 0xDF, 0x60, 0x02,
0x62, 0xE2, 0x0D, 0x40, 0xDF, 0x60, 0x01,
0x62, 0xE2, 0x0D, 0x00, 0xDC, 0x61, 0x04,
0xC4, 0xE2, 0x6D, 0xDC, 0x49, 0x40,
0x62, 0xF2, 0x6D, 0x48, 0xDC, 0x49, 0x01,
0x62, 0xE2, 0x0D, 0x00, 0xDD, 0x60, 0x04,
0x62, 0xE2, 0x0D, 0x20, 0xDD, 0x60, 0x02,
0x62, 0xE2, 0x0D, 0x40, 0xDD, 0x60, 0x01,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vpclmulqdq)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
}
} c;
const uint8_t tbl[] = {
0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03,
0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03,
0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03,
0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03,
0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03,
0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vcompressb_w)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vcompressb(ptr[rax + 64], xmm1);
vcompressb(xmm30 | k5, xmm1);
vcompressb(ptr[rax + 64], ymm1);
vcompressb(ymm30 | k3 |T_z, ymm1);
vcompressb(ptr[rax + 64], zmm1);
vcompressb(zmm30 | k2 |T_z, zmm1);
vcompressw(ptr[rax + 64], xmm1);
vcompressw(xmm30 | k5, xmm1);
vcompressw(ptr[rax + 64], ymm1);
vcompressw(ymm30 | k3 |T_z, ymm1);
vcompressw(ptr[rax + 64], zmm1);
vcompressw(zmm30 | k2 |T_z, zmm1);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40,
0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce,
0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40,
0x62, 0x92, 0x7d, 0xab, 0x63, 0xce,
0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40,
0x62, 0x92, 0x7d, 0xca, 0x63, 0xce,
0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20,
0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce,
0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20,
0x62, 0x92, 0xfd, 0xab, 0x63, 0xce,
0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20,
0x62, 0x92, 0xfd, 0xca, 0x63, 0xce,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(shld)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf3, 0xed, 0x8b, 0x70, 0x68, 0x04, 0x05,
0x62, 0xf3, 0xed, 0xab, 0x70, 0x68, 0x02, 0x05,
0x62, 0xf3, 0xed, 0xcb, 0x70, 0x68, 0x01, 0x05,
0x62, 0xf3, 0x6d, 0x8b, 0x71, 0x68, 0x04, 0x05,
0x62, 0xf3, 0x6d, 0xab, 0x71, 0x68, 0x02, 0x05,
0x62, 0xf3, 0x6d, 0xcb, 0x71, 0x68, 0x01, 0x05,
0x62, 0xf3, 0xed, 0x8b, 0x71, 0x68, 0x04, 0x05,
0x62, 0xf3, 0xed, 0xab, 0x71, 0x68, 0x02, 0x05,
0x62, 0xf3, 0xed, 0xcb, 0x71, 0x68, 0x01, 0x05,
0x62, 0xf2, 0xed, 0x8b, 0x70, 0x68, 0x04,
0x62, 0xf2, 0xed, 0xab, 0x70, 0x68, 0x02,
0x62, 0xf2, 0xed, 0xcb, 0x70, 0x68, 0x01,
0x62, 0xf2, 0x6d, 0x8b, 0x71, 0x68, 0x04,
0x62, 0xf2, 0x6d, 0xab, 0x71, 0x68, 0x02,
0x62, 0xf2, 0x6d, 0xcb, 0x71, 0x68, 0x01,
0x62, 0xf2, 0xed, 0x8b, 0x71, 0x68, 0x04,
0x62, 0xf2, 0xed, 0xab, 0x71, 0x68, 0x02,
0x62, 0xf2, 0xed, 0xcb, 0x71, 0x68, 0x01,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(shrd)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf3, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x05,
0x62, 0xf3, 0xed, 0xab, 0x72, 0x68, 0x02, 0x05,
0x62, 0xf3, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x05,
0x62, 0xf3, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x05,
0x62, 0xf3, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x05,
0x62, 0xf3, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x05,
0x62, 0xf3, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x05,
0x62, 0xf3, 0xed, 0xab, 0x73, 0x68, 0x02, 0x05,
0x62, 0xf3, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x05,
0x62, 0xf2, 0xed, 0x8b, 0x72, 0x68, 0x04,
0x62, 0xf2, 0xed, 0xab, 0x72, 0x68, 0x02,
0x62, 0xf2, 0xed, 0xcb, 0x72, 0x68, 0x01,
0x62, 0xf2, 0x6d, 0x8b, 0x73, 0x68, 0x04,
0x62, 0xf2, 0x6d, 0xab, 0x73, 0x68, 0x02,
0x62, 0xf2, 0x6d, 0xcb, 0x73, 0x68, 0x01,
0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04,
0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02,
0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01,
0x62, 0xf3, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x05,
0x62, 0xf3, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x05,
0x62, 0xf3, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x05,
0x62, 0xf3, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x05,
0x62, 0xf3, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x05,
0x62, 0xf3, 0xed, 0xdb, 0x73, 0x68, 0x08, 0x05,
0x62, 0xf2, 0x6d, 0x9b, 0x73, 0x68, 0x10,
0x62, 0xf2, 0x6d, 0xbb, 0x73, 0x68, 0x10,
0x62, 0xf2, 0x6d, 0xdb, 0x73, 0x68, 0x10,
0x62, 0xf2, 0xed, 0x9b, 0x73, 0x68, 0x08,
0x62, 0xf2, 0xed, 0xbb, 0x73, 0x68, 0x08,
0x62, 0xf2, 0xed, 0xdb, 0x73, 0x68, 0x08,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vpopcnt)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf2, 0x7d, 0x8b, 0x54, 0x68, 0x04,
0x62, 0xf2, 0x7d, 0xab, 0x54, 0x68, 0x02,
0x62, 0xf2, 0x7d, 0xcb, 0x54, 0x68, 0x01,
0x62, 0xf2, 0xfd, 0x8b, 0x54, 0x68, 0x04,
0x62, 0xf2, 0xfd, 0xab, 0x54, 0x68, 0x02,
0x62, 0xf2, 0xfd, 0xcb, 0x54, 0x68, 0x01,
0x62, 0xf2, 0x7d, 0x8b, 0x55, 0x68, 0x04,
0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02,
0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01,
0x62, 0xf2, 0x7d, 0x9b, 0x55, 0x68, 0x10,
0x62, 0xf2, 0x7d, 0xbb, 0x55, 0x68, 0x10,
0x62, 0xf2, 0x7d, 0xdb, 0x55, 0x68, 0x10,
0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04,
0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02,
0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01,
0x62, 0xf2, 0xfd, 0x9b, 0x55, 0x68, 0x08,
0x62, 0xf2, 0xfd, 0xbb, 0x55, 0x68, 0x08,
0x62, 0xf2, 0xfd, 0xdb, 0x55, 0x68, 0x08,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vpdpbus)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf2, 0x5d, 0x83, 0x50, 0x68, 0x04,
0x62, 0xf2, 0x5d, 0xa3, 0x50, 0x68, 0x02,
0x62, 0xf2, 0x5d, 0xc3, 0x50, 0x68, 0x01,
0x62, 0xf2, 0x5d, 0x93, 0x50, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xb3, 0x50, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xd3, 0x50, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0x83, 0x51, 0x68, 0x04,
0x62, 0xf2, 0x5d, 0xa3, 0x51, 0x68, 0x02,
0x62, 0xf2, 0x5d, 0xc3, 0x51, 0x68, 0x01,
0x62, 0xf2, 0x5d, 0x93, 0x51, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xb3, 0x51, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xd3, 0x51, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0x83, 0x52, 0x68, 0x04,
0x62, 0xf2, 0x5d, 0xa3, 0x52, 0x68, 0x02,
0x62, 0xf2, 0x5d, 0xc3, 0x52, 0x68, 0x01,
0x62, 0xf2, 0x5d, 0x93, 0x52, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xb3, 0x52, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xd3, 0x52, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0x83, 0x53, 0x68, 0x04,
0x62, 0xf2, 0x5d, 0xa3, 0x53, 0x68, 0x02,
0x62, 0xf2, 0x5d, 0xc3, 0x53, 0x68, 0x01,
0x62, 0xf2, 0x5d, 0x93, 0x53, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xb3, 0x53, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xd3, 0x53, 0x68, 0x10,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vexpand_vpshufbitqmb)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpexpandb(xmm5|k3|T_z, xmm30);
vpexpandb(ymm5|k3|T_z, ymm30);
vpexpandb(zmm5|k3|T_z, zmm30);
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(xmm5|k3|T_z, xmm30);
vpexpandw(ymm5|k3|T_z, ymm30);
vpexpandw(zmm5|k3|T_z, zmm30);
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0x92, 0x7d, 0x8b, 0x62, 0xee,
0x62, 0x92, 0x7d, 0xab, 0x62, 0xee,
0x62, 0x92, 0x7d, 0xcb, 0x62, 0xee,
0x62, 0xf2, 0x7d, 0x8b, 0x62, 0x68, 0x40,
0x62, 0xf2, 0x7d, 0xab, 0x62, 0x68, 0x40,
0x62, 0xf2, 0x7d, 0xcb, 0x62, 0x68, 0x40,
0x62, 0x92, 0xfd, 0x8b, 0x62, 0xee,
0x62, 0x92, 0xfd, 0xab, 0x62, 0xee,
0x62, 0x92, 0xfd, 0xcb, 0x62, 0xee,
0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20,
0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20,
0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20,
0x62, 0xf2, 0x6d, 0x0a, 0x8f, 0x48, 0x04,
0x62, 0xf2, 0x6d, 0x2a, 0x8f, 0x48, 0x02,
0x62, 0xf2, 0x6d, 0x4a, 0x8f, 0x48, 0x01,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(gf2)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
///
gf2p8affineinvqb(xmm1, xmm2, 3);
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
///
gf2p8affineqb(xmm1, xmm2, 3);
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
///
gf2p8mulb(xmm1, xmm2);
gf2p8mulb(xmm1, ptr [rax + 0x40]);
vgf2p8mulb(xmm1, xmm5, xmm2);
vgf2p8mulb(ymm1, ymm5, ymm2);
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
vgf2p8mulb(xmm30, xmm31, xmm4);
vgf2p8mulb(ymm30, ymm31, ymm4);
vgf2p8mulb(zmm30, zmm31, zmm4);
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x66, 0x0f, 0x3a, 0xcf, 0xca, 0x03,
0x66, 0x0f, 0x3a, 0xcf, 0x48, 0x40, 0x03,
0xc4, 0xe3, 0xd1, 0xcf, 0xca, 0x03,
0xc4, 0xe3, 0xd5, 0xcf, 0xca, 0x03,
0xc4, 0xe3, 0xd1, 0xcf, 0x48, 0x40, 0x03,
0xc4, 0xe3, 0xd5, 0xcf, 0x48, 0x40, 0x03,
0x62, 0x63, 0x85, 0x00, 0xcf, 0xf4, 0x05,
0x62, 0x63, 0x85, 0x20, 0xcf, 0xf4, 0x05,
0x62, 0x63, 0x85, 0x40, 0xcf, 0xf4, 0x05,
0x62, 0x63, 0xd5, 0x89, 0xcf, 0x70, 0x04, 0x05,
0x62, 0x63, 0xd5, 0xa9, 0xcf, 0x70, 0x02, 0x05,
0x62, 0x63, 0xd5, 0xc9, 0xcf, 0x70, 0x01, 0x05,
0x62, 0x63, 0xd5, 0x99, 0xcf, 0x70, 0x08, 0x05,
0x62, 0x63, 0xd5, 0xb9, 0xcf, 0x70, 0x08, 0x05,
0x62, 0x63, 0xd5, 0xd9, 0xcf, 0x70, 0x08, 0x05,
0x66, 0x0f, 0x3a, 0xce, 0xca, 0x03,
0x66, 0x0f, 0x3a, 0xce, 0x48, 0x40, 0x03,
0xc4, 0xe3, 0xd1, 0xce, 0xca, 0x03,
0xc4, 0xe3, 0xd5, 0xce, 0xca, 0x03,
0xc4, 0xe3, 0xd1, 0xce, 0x48, 0x40, 0x03,
0xc4, 0xe3, 0xd5, 0xce, 0x48, 0x40, 0x03,
0x62, 0x63, 0x85, 0x00, 0xce, 0xf4, 0x05,
0x62, 0x63, 0x85, 0x20, 0xce, 0xf4, 0x05,
0x62, 0x63, 0x85, 0x40, 0xce, 0xf4, 0x05,
0x62, 0x63, 0xd5, 0x89, 0xce, 0x70, 0x04, 0x05,
0x62, 0x63, 0xd5, 0xa9, 0xce, 0x70, 0x02, 0x05,
0x62, 0x63, 0xd5, 0xc9, 0xce, 0x70, 0x01, 0x05,
0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05,
0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05,
0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05,
0x66, 0x0f, 0x38, 0xcf, 0xca,
0x66, 0x0f, 0x38, 0xcf, 0x48, 0x40,
0xc4, 0xe2, 0x51, 0xcf, 0xca,
0xc4, 0xe2, 0x55, 0xcf, 0xca,
0xc4, 0xe2, 0x51, 0xcf, 0x48, 0x40,
0xc4, 0xe2, 0x55, 0xcf, 0x48, 0x40,
0x62, 0x62, 0x05, 0x00, 0xcf, 0xf4,
0x62, 0x62, 0x05, 0x20, 0xcf, 0xf4,
0x62, 0x62, 0x05, 0x40, 0xcf, 0xf4,
0x62, 0x62, 0x55, 0x89, 0xcf, 0x70, 0x04,
0x62, 0x62, 0x55, 0xa9, 0xcf, 0x70, 0x02,
0x62, 0x62, 0x55, 0xc9, 0xcf, 0x70, 0x01,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(bf16)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]);
vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]);
vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]);
vcvtneps2bf16(xmm0, xword [rax + 64]);
vcvtneps2bf16(xmm0 | k1, yword [rax + 64]);
vcvtneps2bf16(ymm0 | k1, zword [rax + 64]);
vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]);
vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]);
vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]);
vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf2, 0x77, 0x09, 0x72, 0x40, 0x04,
0x62, 0xf2, 0x7f, 0xa9, 0x72, 0x40, 0x02,
0x62, 0xf2, 0x77, 0x49, 0x72, 0x40, 0x01,
0x62, 0xf2, 0x7e, 0x08, 0x72, 0x40, 0x04,
0x62, 0xf2, 0x7e, 0x29, 0x72, 0x40, 0x02,
0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01,
0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01,
0x62, 0xf2, 0x76, 0x09, 0x52, 0x40, 0x04,
0x62, 0xf2, 0x76, 0x29, 0x52, 0x40, 0x02,
0x62, 0xf2, 0x76, 0x49, 0x52, 0x40, 0x01,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
#endif

View File

@ -0,0 +1,37 @@
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#include <string.h>
#include <vector>
struct Code : Xbyak::CodeGenerator {
Code(int x)
{
mov(eax, x);
ret();
}
};
int main()
try
{
#ifdef XBYAK_USE_MMAP_ALLOCATOR
puts("use Allocator with mmap");
#else
puts("use Allocator with posix_memalign");
#endif
const int N = 70000;
std::vector<Code*> v(N);
for (int i = 0; i < N; i++) {
v[i] = new Code(i);
}
long long sum = 0;
for (int i = 0; i < N; i++) {
sum += v[i]->getCode<int (*)()>()();
}
for (int i = 0; i < N; i++) {
delete v[i];
}
printf("sum=%lld\n", sum);
} catch (std::exception& e) {
printf("ERR %s\n", e.what());
}

View File

@ -0,0 +1,39 @@
#include <stdio.h>
#define XBYAK_ENABLE_OMITTED_OPERAND
#include "xbyak/xbyak.h"
#define CYBOZU_TEST_DISABLE_AUTO_RUN
#include "cybozu/test.hpp"
using namespace Xbyak;
#ifdef _MSC_VER
#pragma warning(disable : 4245)
#pragma warning(disable : 4312)
#endif
class Sample : public CodeGenerator {
void operator=(const Sample&);
public:
#include "nm.cpp"
};
class ErrorSample : public CodeGenerator {
void operator=(const ErrorSample&);
public:
void gen()
{
CYBOZU_TEST_EXCEPTION(mov(ptr[eax],1), std::exception);
CYBOZU_TEST_EXCEPTION(test(ptr[eax],1), std::exception);
CYBOZU_TEST_EXCEPTION(adc(ptr[eax],1), std::exception);
CYBOZU_TEST_EXCEPTION(setz(eax), std::exception);
}
};
int main()
{
CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 4u);
Sample s;
s.gen();
ErrorSample es;
es.gen();
}

View File

@ -0,0 +1,45 @@
/*
normalize prefix
*/
#include <string>
#include <set>
#include <iostream>
#include <memory.h>
typedef unsigned char uint8;
std::string normalize(const std::string& line)
{
static const char tbl[][3] = { "66", "67", "F2", "F3" };
size_t tblNum = sizeof(tbl) / sizeof(tbl[0]);
typedef std::set<std::string> StringSet;
StringSet suf;
size_t pos = 0;
for (; pos < line.size(); pos += 2) {
bool found = false;
for (size_t i = 0; i < tblNum; i++) {
if (::memcmp(&line[pos], tbl[i], 2) == 0) {
found = true;
suf.insert(tbl[i]);
break;
}
}
if (!found) break;
}
std::string ret;
for (StringSet::const_iterator i = suf.begin(), e = suf.end(); i != e; ++i) {
ret += *i;
}
ret += &line[pos];
return ret;
}
int main()
{
std::string line;
while (std::getline(std::cin, line)) {
std::string normalizedLine = normalize(line);
std::cout << normalizedLine << '\n';//std::endl;
}
}

View File

@ -0,0 +1,6 @@
test script on Windows
this test requires nasm.exe, yasm.exe, cl.exe, awk, diff
test_all ; for all tests

View File

@ -0,0 +1,88 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
/*
dump of vc
00000000003A0000 F3 0F C2 05 F1 00 00 00 00 cmpeqss xmm0,dword ptr [3A00FAh]
00000000003A0009 F7 05 E7 00 00 00 21 00 00 00 test dword ptr [3A00FAh],21h
00000000003A0013 0F BA 25 DF 00 00 00 03 bt dword ptr [3A00FAh],3
00000000003A001B C4 E3 79 0D 05 D5 00 00 00 03 vblendpd xmm0,xmm0,xmmword ptr [3A00FAh],3
00000000003A0025 C4 E3 79 0F 05 CB 00 00 00 04 vpalignr xmm0,xmm0,xmmword ptr [3A00FAh],4
00000000003A002F C4 E3 7D 19 1D C1 00 00 00 0C vextractf128 xmmword ptr [3A00FAh],ymm3,0Ch
00000000003A0039 C4 E3 75 46 05 B7 00 00 00 0D vperm2i128 ymm0,ymm1,ymmword ptr [3A00FAh],0Dh
00000000003A0043 C4 E3 79 1D 15 AD 00 00 00 2C vcvtps2ph mmword ptr [3A00FAh],xmm2,2Ch
00000000003A004D C7 05 A3 00 00 00 34 12 00 00 mov dword ptr [3A00FAh],1234h
00000000003A0057 C1 25 9C 00 00 00 03 shl dword ptr [3A00FAh],3
00000000003A005E D1 2D 96 00 00 00 shr dword ptr [3A00FAh],1
00000000003A0064 48 0F A4 05 8D 00 00 00 03 shld qword ptr [3A00FAh],rax,3
00000000003A006D 48 6B 05 85 00 00 00 15 imul rax,qword ptr [3A00FAh],15h
00000000003A0075 C4 E3 FB F0 05 7B 00 00 00 15 rorx rax,qword ptr [3A00FAh],15h
00000000003A007F F7 05 71 00 00 00 05 00 00 00 test dword ptr [3A00FAh],5
00000000003A0089 66 48 0F 3A 16 05 66 00 00 00 03 pextrq qword ptr [3A00FAh],xmm0,3
00000000003A0094 66 48 0F 3A 22 15 5B 00 00 00 05 pinsrq xmm2,qword ptr [3A00FAh],5
00000000003A009F 66 0F 3A 15 0D 51 00 00 00 04 pextrw word ptr [3A00FAh],xmm1,4
00000000003A00A9 81 15 47 00 00 00 45 23 01 00 adc dword ptr [3A00FAh],12345h
00000000003A00B3 0F BA 25 3F 00 00 00 34 bt dword ptr [3A00FAh],34h
00000000003A00BB 66 0F BA 3D 36 00 00 00 34 btc word ptr [3A00FAh],34h
00000000003A00C4 0F BA 35 2E 00 00 00 34 btr dword ptr [3A00FAh],34h
00000000003A00CC C1 15 27 00 00 00 04 rcl dword ptr [3A00FAh],4
00000000003A00D3 48 0F A4 05 1E 00 00 00 04 shld qword ptr [3A00FAh],rax,4
00000000003A00DC 0F 3A 0F 05 15 00 00 00 04 palignr mm0,mmword ptr [3A00FAh],4
00000000003A00E5 66 0F 3A DF 1D 0B 00 00 00 04 aeskeygenassist xmm3,xmmword ptr [3A00FAh],4
00000000003A00EF C4 E3 79 60 15 01 00 00 00 07 vpcmpestrm xmm2,xmmword ptr [3A00FAh],7
00000000003A00F9 C3 ret
00000000003A00FA F0 DE BC 9A 78 56 34 12
*/
struct Code : Xbyak::CodeGenerator {
Code()
{
Xbyak::Label label;
cmpss(xmm0, ptr[rip + label], 0);
test(dword[rip + label], 33);
bt(dword[rip + label ], 3);
vblendpd(xmm0, xmm0, dword[rip + label], 3);
vpalignr(xmm0, xmm0, qword[rip + label], 4);
vextractf128(dword[rip + label], ymm3, 12);
vperm2i128(ymm0, ymm1, qword[rip + label], 13);
vcvtps2ph(ptr[rip + label], xmm2, 44);
mov(dword[rip + label], 0x1234);
shl(dword[rip + label], 3);
shr(dword[rip + label], 1);
shld(qword[rip + label], rax, 3);
imul(rax, qword[rip + label], 21);
rorx(rax, qword[rip + label], 21);
test(dword[rip + label], 5);
pextrq(ptr[rip + label], xmm0, 3);
pinsrq(xmm2, ptr[rip + label], 5);
pextrw(ptr[rip + label], xmm1, 4);
adc(dword[rip + label], 0x12345);
bt(byte[rip + label], 0x34);
btc(word[rip + label], 0x34);
btr(dword[rip + label], 0x34);
rcl(dword[rip + label], 4);
shld(qword[rip + label], rax, 4);
palignr(mm0, ptr[rip + label], 4);
aeskeygenassist(xmm3, ptr[rip + label], 4);
vpcmpestrm(xmm2, ptr[rip + label], 7);
ret();
L(label);
dq(0x123456789abcdef0ull);
};
};
void dump(const unsigned char *p, size_t n)
{
for (int i = 0; i < n; i++) {
printf("%02x ", p[i]);
if ((i % 16) == 15) putchar('\n');
}
putchar('\n');
}
int main()
{
Code code;
void (*f)() = code.getCode<void (*)()>();
dump(code.getCode(), code.getSize());
f();
}

View File

@ -0,0 +1,2 @@
@echo off
set OPT=/EHsc -I../xbyak -I./ /W4 -D_CRT_SECURE_NO_WARNINGS /nologo

View File

@ -0,0 +1,416 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak_util.h>
#ifdef XBYAK32
#error "this sample is for only 64-bit mode"
#endif
using namespace Xbyak::util;
struct Code : public Xbyak::CodeGenerator {
void gen1()
{
StackFrame sf(this, 1);
mov(rax, sf.p[0]);
}
void gen2()
{
StackFrame sf(this, 2);
lea(rax, ptr [sf.p[0] + sf.p[1]]);
}
void gen3()
{
StackFrame sf(this, 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
void gen4()
{
StackFrame sf(this, 4);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
void gen5()
{
StackFrame sf(this, 4, UseRCX);
xor_(rcx, rcx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
void gen6()
{
StackFrame sf(this, 4, UseRCX | UseRDX);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
void gen7()
{
StackFrame sf(this, 3, UseRCX | UseRDX);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
void gen8()
{
StackFrame sf(this, 3, 3 | UseRCX | UseRDX);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(sf.t[0], 1);
mov(sf.t[1], 2);
mov(sf.t[2], 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
void gen9()
{
StackFrame sf(this, 3, 3 | UseRCX | UseRDX, 32);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(sf.t[0], 1);
mov(sf.t[1], 2);
mov(sf.t[2], 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
mov(ptr [rsp + 8 * 0], rax);
mov(ptr [rsp + 8 * 1], rax);
mov(ptr [rsp + 8 * 2], rax);
mov(ptr [rsp + 8 * 3], rax);
}
void gen10()
{
StackFrame sf(this, 4, 8 | UseRCX | UseRDX, 32);
xor_(rcx, rcx);
xor_(rdx, rdx);
for (int i = 0; i < 8; i++) {
mov(sf.t[i], i);
}
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
mov(ptr [rsp + 8 * 0], rax);
mov(ptr [rsp + 8 * 1], rax);
mov(ptr [rsp + 8 * 2], rax);
mov(ptr [rsp + 8 * 3], rax);
}
void gen11()
{
StackFrame sf(this, 0, UseRCX);
xor_(rcx, rcx);
mov(rax, 3);
}
void gen12()
{
StackFrame sf(this, 4, UseRDX);
xor_(rdx, rdx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
/*
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
*/
void gen13()
{
StackFrame sf(this, 1, 13);
for (int i = 0; i < 13; i++) {
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
}
mov(rax, sf.t[0]);
for (int i = 1; i < 13; i++) {
add(rax, sf.t[i]);
}
}
/*
same as gen13
*/
void gen14()
{
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
Pack t = sf.t;
t.append(rcx);
t.append(rdx);
for (int i = 0; i < 13; i++) {
mov(t[i], ptr[sf.p[0] + i * 8]);
}
mov(rax, t[0]);
for (int i = 1; i < 13; i++) {
add(rax, t[i]);
}
}
/*
return (1 << 15) - 1;
*/
void gen15()
{
StackFrame sf(this, 0, 14, 8);
Pack t = sf.t;
t.append(rax);
for (int i = 0; i < 15; i++) {
mov(t[i], 1 << i);
}
mov(qword[rsp], 0);
for (int i = 0; i < 15; i++) {
add(ptr[rsp], t[i]);
}
mov(rax, ptr[rsp]);
}
};
struct Code2 : Xbyak::CodeGenerator {
Code2()
: Xbyak::CodeGenerator(4096 * 32)
{
}
void gen(int pNum, int tNum, int stackSizeByte)
{
StackFrame sf(this, pNum, tNum, stackSizeByte);
if (tNum & UseRCX) xor_(rcx, rcx);
if (tNum & UseRDX) xor_(rdx, rdx);
for (int i = 0, n = tNum & ~(UseRCX | UseRDX); i < n; i++) {
mov(sf.t[i], 5);
}
for (int i = 0; i < stackSizeByte; i++) {
mov(byte [rsp + i], 0);
}
mov(rax, 1);
for (int i = 0; i < pNum; i++) {
add(rax, sf.p[i]);
}
}
void gen2(int pNum, int tNum, int stackSizeByte)
{
StackFrame sf(this, pNum, tNum, stackSizeByte);
mov(rax, rsp);
}
};
static int errNum = 0;
void check(int x, int y)
{
if (x != y) {
printf("err x=%d, y=%d\n", x, y);
errNum++;
}
}
void verify(const Xbyak::uint8 *f, int pNum)
{
switch (pNum) {
case 0:
check(1, reinterpret_cast<int (*)()>(f)());
return;
case 1:
check(11, reinterpret_cast<int (*)(int)>(f)(10));
return;
case 2:
check(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
return;
case 3:
check(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
return;
case 4:
check(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
return;
default:
printf("ERR pNum=%d\n", pNum);
exit(1);
}
}
void testAll()
{
Code2 code;
for (int stackSize = 0; stackSize < 32; stackSize += 7) {
for (int pNum = 0; pNum < 4; pNum++) {
for (int mode = 0; mode < 4; mode++) {
int maxNum = 0;
int opt = 0;
if (mode == 0) {
maxNum = 10;
} else if (mode == 1) {
maxNum = 9;
opt = UseRCX;
} else if (mode == 2) {
maxNum = 9;
opt = UseRDX;
} else {
maxNum = 8;
opt = UseRCX | UseRDX;
}
for (int tNum = 0; tNum < maxNum; tNum++) {
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
const Xbyak::uint8 *f = code.getCurr();
code.gen(pNum, tNum | opt, stackSize);
verify(f, pNum);
/*
check rsp is 16-byte aligned if stackSize > 0
*/
if (stackSize > 0) {
Code2 c2;
c2.gen2(pNum, tNum | opt, stackSize);
uint64_t addr = c2.getCode<uint64_t (*)()>()();
check(addr % 16, 0);
}
}
}
}
}
}
void testPartial()
{
Code code;
int (*f1)(int) = code.getCurr<int (*)(int)>();
code.gen1();
check(5, f1(5));
int (*f2)(int, int) = code.getCurr<int (*)(int, int)>();
code.gen2();
check(9, f2(3, 6));
int (*f3)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen3();
check(14, f3(1, 4, 9));
int (*f4)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen4();
check(30, f4(1, 4, 9, 16));
int (*f5)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen5();
check(23, f5(2, 5, 7, 9));
int (*f6)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen6();
check(18, f6(3, 4, 5, 6));
int (*f7)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen7();
check(12, f7(3, 4, 5));
int (*f8)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen8();
check(23, f8(5, 8, 10));
int (*f9)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen9();
check(60, f9(10, 20, 30));
int (*f10)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen10();
check(100, f10(10, 20, 30, 40));
int (*f11)() = code.getCurr<int (*)()>();
code.gen11();
check(3, f11());
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen12();
check(24, f12(3, 5, 7, 9));
{
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
code.gen13();
check(91, f13(tbl));
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
code.gen14();
check(91, f14(tbl));
}
int (*f15)() = code.getCurr<int (*)()>();
code.gen15();
check((1 << 15) - 1, f15());
}
void put(const Xbyak::util::Pack& p)
{
for (size_t i = 0, n = p.size(); i < n; i++) {
printf("%s ", p[i].toString());
}
printf("\n");
}
void verifyPack(const Xbyak::util::Pack& p, const int *tbl, size_t tblNum)
{
for (size_t i = 0; i < tblNum; i++) {
check(p[i].getIdx(), tbl[i]);
}
}
void testPack()
{
const int N = 10;
Xbyak::Reg64 regTbl[N];
for (int i = 0; i < N; i++) {
regTbl[i] = Xbyak::Reg64(i);
}
Xbyak::util::Pack p(regTbl, N);
const struct {
int pos;
int num;
int tbl[10];
} tbl[] = {
{ 0, 10, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
{ 1, 9, { 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
{ 2, 8, { 2, 3, 4, 5, 6, 7, 8, 9 } },
{ 3, 7, { 3, 4, 5, 6, 7, 8, 9 } },
{ 4, 6, { 4, 5, 6, 7, 8, 9 } },
{ 5, 5, { 5, 6, 7, 8, 9 } },
{ 6, 4, { 6, 7, 8, 9 } },
{ 7, 3, { 7, 8, 9 } },
{ 8, 2, { 8, 9 } },
{ 9, 1, { 9 } },
{ 3, 5, { 3, 4, 5, 6, 7 } },
};
for (size_t i = 0; i < sizeof(tbl) / sizeof(*tbl); i++) {
const int pos = tbl[i].pos;
const int num = tbl[i].num;
verifyPack(p.sub(pos, num), tbl[i].tbl, num);
if (pos + num == N) {
verifyPack(p.sub(pos), tbl[i].tbl, num);
}
}
}
int main()
try
{
testAll();
testPartial();
testPack();
printf("errNum=%d\n", errNum);
} catch (std::exception& e) {
printf("err %s\n", e.what());
return 1;
} catch (...) {
puts("ERR");
return 1;
}

Binary file not shown.

View File

@ -0,0 +1,37 @@
@echo off
set FILTER=grep -v warning
if /i "%1"=="64" (
set OPT2=-DXBYAK64
set OPT3=win64
) else (
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
if /i "%1"=="64" (
call :sub 1
call :sub 2
) else (
call :sub 1
)
goto end
:sub
echo cl address.cpp %OPT% %OPT2%
cl address.cpp %OPT% %OPT2%
address %1% > a.asm
echo nasm -f %OPT3% -l a.lst a.asm
nasm -f %OPT3% -l a.lst a.asm
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
echo address %1% jit > nm.cpp
address %1% jit > nm.cpp
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame > x.lst
diff -w x.lst ok.lst
wc x.lst
:end

View File

@ -0,0 +1,41 @@
#!/bin/sh
FILTER="grep -v warning"
sub()
{
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
echo "compile address.cpp"
g++ $CFLAGS address.cpp -o address
./address $1 > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
echo "xbyak"
./address $1 jit > nm.cpp
echo "compile nm_frame.cpp"
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame > x.lst
diff ok.lst x.lst && echo "ok"
}
if [ "$1" = "64" ]; then
echo "nasm(64bit)"
EXE=nasm
OPT2=-DXBYAK64
OPT3=win64
sub 1
sub 2
else
echo "nasm(32bit)"
EXE=nasm
OPT2=-DXBYAK32
OPT3=win32
sub 1
fi

View File

@ -0,0 +1,8 @@
@echo off
call test_nm_all
echo *** test addressing ***
call test_address
call test_address 64
echo *** test jmp address ***
call test_jmp
echo *** all test end ***

View File

@ -0,0 +1,42 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="Y" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK32
set OPT3=win32
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="Y64" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
make_nm > a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
if /i "%Y%"=="1" (
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
) else (
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
)
make_nm jit > nm.cpp
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff -w x.lst ok.lst
wc x.lst

View File

@ -0,0 +1,43 @@
#!/bin/tcsh
set FILTER="grep -v warning"
if ($1 == "Y") then
echo "yasm(32bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK32"
set OPT3=win32
else if ($1 == "64") then
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "Y64") then
echo "yasm(64bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK64"
set OPT3=win64
set FILTER=./normalize_prefix
else
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
echo "compile make_nm.cpp"
g++ $CFLAGS make_nm.cpp -o make_nm
./make_nm > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst
echo "xbyak"
./make_nm jit > nm.cpp
echo "compile nm_frame.cpp"
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame | $FILTER > x.lst
diff -B ok.lst x.lst && echo "ok"

View File

@ -0,0 +1,31 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="min" (
set EXE=nasm.exe
set OPT2=-DXBYAK64 -DMIN_TEST
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
echo cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
make_512 > a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
make_512 jit > nm.cpp
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% /DXBYAK_AVX512
nm_frame |%FILTER% > x.lst
diff -w x.lst ok.lst
wc x.lst

View File

@ -0,0 +1,32 @@
#!/bin/tcsh
set FILTER="grep -v warning"
if ($1 == "64") then
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
echo "compile make_512.cpp"
g++ $CFLAGS make_512.cpp -o make_512
./make_512 > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
echo "xbyak"
./make_512 jit > nm.cpp
echo "compile nm_frame.cpp"
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
./nm_frame | $FILTER > x.lst
diff -B ok.lst x.lst && echo "ok"

View File

@ -0,0 +1,13 @@
@echo off
echo ** nasm-avx(32bit) ***
call test_avx
echo ** nasm-avx(64bit) ***
call test_avx 64
echo ** yasm-avx(32bit) ***
call test_avx Y
echo ** yasm-avx(64bit) ***
call test_avx Y64
echo ** nasm-avx512(32bit) ***
call test_avx512
echo ** nasm-avx512(64bit) ***
call test_avx512 64

View File

@ -0,0 +1,4 @@
call set_opt
bmake -f Makefile.win all
cl -I../ -I./ -DXBYAK_TEST jmp.cpp %OPT% /Od /Zi
jmp

View File

@ -0,0 +1,4 @@
call set_opt
bmake -f Makefile.win all
cl -I../ -I./ -DXBYAK_TEST misc.cpp %OPT% /Od /Zi
misc

View File

@ -0,0 +1,78 @@
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#include <stdio.h>
#include <stdlib.h>
#include "../../include.mie/mie_thread.h"
#include "xbyak/xbyak.h"
class WriteMMX : public Xbyak::CodeGenerator {
public:
WriteMMX()
{
#ifdef XBYAK32
mov(ecx, ptr [esp + 4]);
#endif
movd(mm0, ecx);
ret();
}
void (*set() const)(int x) { return (void (*)(int x))getCode(); }
};
class ReadMMX : public Xbyak::CodeGenerator {
public:
ReadMMX()
{
movd(eax, mm0);
ret();
}
int (*get() const)() { return (int (*)())getCode(); }
};
class Test : public MIE::ThreadBase<Test> {
int n_;
public:
Test(int n)
: n_(n)
{
}
void threadEntry()
{
printf("n=%d\n", n_);
WriteMMX w;
w.set()(n_);
ReadMMX r;
for (;;) {
int b = r.get()();
printf("b=%d\n", b);
if (b != n_) {
printf("mm0 has changed!\n");
}
MIE::MIE_Sleep(1000);
}
}
void stopThread() { }
};
int main(int argc, char *argv[])
{
#ifdef XBYAK32
puts("32bit");
#else
puts("64bit");
#endif
try {
int n = atoi(argc == 1 ? "1223" : argv[1]);
Test test0(n), test1(n + 1);
test0.beginThread();
test1.beginThread();
test0.joinThread();
test1.joinThread();
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}

View File

@ -0,0 +1,42 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="Y" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK32
set OPT3=win32
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="Y64" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
make_nm > a.asm
rm -rf a.lst
echo %EXE% -f %OPT3% -l a.lst a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
if /i "%Y%"=="1" (
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
) else (
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
)
make_nm jit > nm.cpp
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff -wb x.lst ok.lst && echo "ok"

View File

@ -0,0 +1,49 @@
#!/bin/tcsh
set FILTER=cat
if ($1 == "Y") then
echo "yasm(32bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK32"
set OPT3=win32
else if ($1 == "64") then
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "Y64") then
echo "yasm(64bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK64"
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "avx512") then
echo "nasm(64bit) + avx512"
set EXE=nasm
set OPT2="-DXBYAK64 -DUSE_AVX512"
set OPT3=win64
set FILTER=./normalize_prefix
else
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
echo "compile make_nm.cpp"
g++ $CFLAGS make_nm.cpp -o make_nm
./make_nm > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst
echo "xbyak"
./make_nm jit > nm.cpp
echo "compile nm_frame.cpp"
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame | $FILTER > x.lst
diff -B ok.lst x.lst && echo "ok"

View File

@ -0,0 +1,11 @@
@echo off
echo *** nasm(32bit) ***
call test_nm
echo *** yasm(32bit) ***
call test_nm Y
echo *** nasm(64bit) ***
call test_nm 64
echo *** yasm(64bit) ***
call test_nm Y64
call test_avx_all

90
core/deps/xbyak/xbyak.sln Normal file
View File

@ -0,0 +1,90 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.28010.2016
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bf", "sample\bf.vcxproj", "{654BD79B-59D3-4B10-BBAA-158BAB272828}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "sample\calc.vcxproj", "{5FDDFAA6-B947-491D-A17E-BBD863846579}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "quantize", "sample\quantize.vcxproj", "{D06753BF-E1F3-4578-9B18-08673327F77C}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test0", "sample\test0.vcxproj", "{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toyvm", "sample\toyvm.vcxproj", "{2E41C7AF-39FF-454C-B081-37445378DCB3}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_util", "sample\test_util.vcxproj", "{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc2", "sample\calc2.vcxproj", "{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|Win32.ActiveCfg = Debug|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|Win32.Build.0 = Debug|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|x64.ActiveCfg = Debug|x64
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|x64.Build.0 = Debug|x64
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|Win32.ActiveCfg = Release|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|Win32.Build.0 = Release|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|x64.ActiveCfg = Release|x64
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|x64.Build.0 = Release|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|Win32.ActiveCfg = Debug|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|Win32.Build.0 = Debug|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|x64.ActiveCfg = Debug|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|x64.Build.0 = Debug|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|Win32.ActiveCfg = Release|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|Win32.Build.0 = Release|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|x64.ActiveCfg = Release|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|x64.Build.0 = Release|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|Win32.ActiveCfg = Debug|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|Win32.Build.0 = Debug|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|x64.ActiveCfg = Debug|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|x64.Build.0 = Debug|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|Win32.ActiveCfg = Release|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|Win32.Build.0 = Release|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|x64.ActiveCfg = Release|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|x64.Build.0 = Release|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|Win32.ActiveCfg = Debug|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|Win32.Build.0 = Debug|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|x64.ActiveCfg = Debug|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|x64.Build.0 = Debug|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|Win32.ActiveCfg = Release|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|Win32.Build.0 = Release|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|x64.ActiveCfg = Release|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|x64.Build.0 = Release|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|Win32.ActiveCfg = Debug|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|Win32.Build.0 = Debug|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|x64.ActiveCfg = Debug|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|x64.Build.0 = Debug|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|Win32.ActiveCfg = Release|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|Win32.Build.0 = Release|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|x64.ActiveCfg = Release|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|x64.Build.0 = Release|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|Win32.ActiveCfg = Debug|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|Win32.Build.0 = Debug|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|x64.ActiveCfg = Debug|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|x64.Build.0 = Debug|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|Win32.ActiveCfg = Release|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|Win32.Build.0 = Release|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|x64.ActiveCfg = Release|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|x64.Build.0 = Release|x64
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|Win32.ActiveCfg = Debug|Win32
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|Win32.Build.0 = Debug|Win32
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|x64.ActiveCfg = Debug|x64
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|x64.Build.0 = Debug|x64
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|Win32.ActiveCfg = Release|Win32
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|Win32.Build.0 = Release|Win32
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|x64.ActiveCfg = Release|x64
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {DAE0012B-DDCC-4614-9110-D52E351B2A80}
EndGlobalSection
EndGlobal

View File

@ -9,10 +9,8 @@
@note modified new BSD license
http://opensource.org/licenses/BSD-3-Clause
*/
#ifndef XBYAK_NO_OP_NAMES
#if not +0 // trick to detect whether 'not' is operator or not
#error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()."
#endif
#if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not
#define XBYAK_NO_OP_NAMES
#endif
#include <stdio.h> // for debug print
@ -40,6 +38,8 @@
// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
#include <unordered_set>
#define XBYAK_STD_UNORDERED_SET std::unordered_set
#include <unordered_map>
#define XBYAK_STD_UNORDERED_MAP std::unordered_map
#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
@ -49,16 +49,22 @@
libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
*/
#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
#include <tr1/unordered_set>
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
#include <tr1/unordered_map>
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
#include <unordered_set>
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
#include <unordered_map>
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
#else
#include <set>
#define XBYAK_STD_UNORDERED_SET std::set
#include <map>
#define XBYAK_STD_UNORDERED_MAP std::map
#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
@ -72,6 +78,10 @@
#include <sys/mman.h>
#include <stdlib.h>
#endif
#if defined(__APPLE__) && defined(MAP_JIT)
#define XBYAK_USE_MAP_JIT
#include <sys/sysctl.h>
#endif
#if !defined(_MSC_VER) || (_MSC_VER >= 1600)
#include <stdint.h>
#endif
@ -105,7 +115,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x5650 /* 0xABCD = A.BC(D) */
VERSION = 0x5891 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@ -178,7 +188,8 @@ enum {
ERR_INVALID_ZERO,
ERR_INVALID_RIP_IN_AUTO_GROW,
ERR_INVALID_MIB_ADDRESS,
ERR_INTERNAL
ERR_X2APIC_IS_NOT_SUPPORTED,
ERR_INTERNAL // Put it at last.
};
class Error : public std::exception {
@ -187,8 +198,7 @@ public:
explicit Error(int err) : err_(err)
{
if (err_ < 0 || err_ > ERR_INTERNAL) {
fprintf(stderr, "bad err=%d in Xbyak::Error\n", err_);
exit(1);
err_ = ERR_INTERNAL;
}
}
operator int() const { return err_; }
@ -239,9 +249,11 @@ public:
"invalid zero",
"invalid rip in AutoGrow",
"invalid mib address",
"internal error",
"x2APIC is not supported",
"internal error"
};
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
assert(err_ <= ERR_INTERNAL);
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
return errTbl[err_];
}
};
@ -315,6 +327,29 @@ struct Allocator {
};
#ifdef XBYAK_USE_MMAP_ALLOCATOR
#ifdef XBYAK_USE_MAP_JIT
namespace util {
inline int getMacOsVersionPure()
{
char buf[64];
size_t size = sizeof(buf);
int err = sysctlbyname("kern.osrelease", buf, &size, NULL, 0);
if (err != 0) return 0;
char *endp;
int major = strtol(buf, &endp, 10);
if (*endp != '.') return 0;
return major;
}
inline int getMacOsVersion()
{
static const int version = getMacOsVersionPure();
return version;
}
} // util
#endif
class MmapAllocator : Allocator {
typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, size_t> SizeList;
SizeList sizeList_;
@ -323,7 +358,11 @@ public:
{
const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
size = (size + alignedSizeM1) & ~alignedSizeM1;
#ifdef MAP_ANONYMOUS
#if defined(XBYAK_USE_MAP_JIT)
int mode = MAP_PRIVATE | MAP_ANONYMOUS;
const int mojaveVersion = 18;
if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT;
#elif defined(MAP_ANONYMOUS)
const int mode = MAP_PRIVATE | MAP_ANONYMOUS;
#elif defined(MAP_ANON)
const int mode = MAP_PRIVATE | MAP_ANON;
@ -423,7 +462,8 @@ public:
kind_ = kind;
bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512;
}
void setBit(int bit) { bit_ = bit; }
// err if MMX/FPU/OPMASK/BNDREG
void setBit(int bit);
void setOpmaskIdx(int idx, bool ignore_idx0 = false)
{
if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
@ -506,6 +546,49 @@ public:
const Reg& getReg() const;
};
inline void Operand::setBit(int bit)
{
if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512) goto ERR;
if (isBit(bit)) return;
if (is(MEM | OPMASK)) {
bit_ = bit;
return;
}
if (is(REG | XMM | YMM | ZMM)) {
int idx = getIdx();
// err if converting ah, bh, ch, dh
if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR;
Kind kind = REG;
switch (bit) {
case 8:
if (idx >= 16) goto ERR;
#ifdef XBYAK32
if (idx >= 4) goto ERR;
#else
if (4 <= idx && idx < 8) idx |= EXT8BIT;
#endif
break;
case 16:
case 32:
case 64:
if (idx >= 16) goto ERR;
break;
case 128: kind = XMM; break;
case 256: kind = YMM; break;
case 512: kind = ZMM; break;
}
idx_ = idx;
kind_ = kind;
bit_ = bit;
if (bit >= 128) return; // keep mask_ and rounding_
mask_ = 0;
rounding_ = 0;
return;
}
ERR:
throw Error(ERR_CANT_CONVERT);
}
class Label;
struct Reg8;
@ -518,7 +601,8 @@ class Reg : public Operand {
public:
Reg() { }
Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
Reg changeBit(int bit) const { return Reg(getIdx(), getKind(), bit, isExt8bit()); }
// convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM
Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; }
uint8 getRexW() const { return isREG(64) ? 8 : 0; }
uint8 getRexR() const { return isExtIdx() ? 4 : 0; }
uint8 getRexX() const { return isExtIdx() ? 2 : 0; }
@ -617,6 +701,12 @@ struct RegRip {
const Label* label_;
bool isAddr_;
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
friend const RegRip operator+(const RegRip& r, int disp) {
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
}
friend const RegRip operator-(const RegRip& r, int disp) {
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
}
friend const RegRip operator+(const RegRip& r, sint64 disp) {
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
}
@ -636,34 +726,23 @@ struct RegRip {
inline Reg8 Reg::cvt8() const
{
const int idx = getIdx();
if (isBit(8)) return Reg8(idx, isExt8bit());
#ifdef XBYAK32
if (idx >= 4) throw Error(ERR_CANT_CONVERT);
#endif
return Reg8(idx, 4 <= idx && idx < 8);
Reg r = changeBit(8); return Reg8(r.getIdx(), r.isExt8bit());
}
inline Reg16 Reg::cvt16() const
{
const int idx = getIdx();
if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
return Reg16(idx);
return Reg16(changeBit(16).getIdx());
}
inline Reg32 Reg::cvt32() const
{
const int idx = getIdx();
if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
return Reg32(idx);
return Reg32(changeBit(32).getIdx());
}
#ifdef XBYAK64
inline Reg64 Reg::cvt64() const
{
const int idx = getIdx();
if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
return Reg64(idx);
return Reg64(changeBit(64).getIdx());
}
#endif
@ -786,6 +865,7 @@ inline RegExp operator-(const RegExp& e, size_t disp)
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
void *const AutoGrow = (void*)1; //-V566
void *const DontSetProtectRWE = (void*)2; //-V566
class CodeArray {
enum Type {
@ -825,6 +905,7 @@ protected:
size_t size_;
bool isCalledCalcJmpAddress_;
bool useProtect() const { return alloc_->useProtect(); }
/*
allocate new memory and copy old data to the new area
*/
@ -848,12 +929,16 @@ protected:
uint64 disp = i->getVal(top_);
rewrite(i->codeOffset, disp, i->jmpSize);
}
if (alloc_->useProtect() && !protect(top_, size_, true)) throw Error(ERR_CANT_PROTECT);
isCalledCalcJmpAddress_ = true;
}
public:
enum ProtectMode {
PROTECT_RW = 0, // read/write
PROTECT_RWE = 1, // read/write/exec
PROTECT_RE = 2 // read/exec
};
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
: type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
, maxSize_(maxSize)
, top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
@ -861,7 +946,7 @@ public:
, isCalledCalcJmpAddress_(false)
{
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, true)) {
if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
alloc_->free(top_);
throw Error(ERR_CANT_PROTECT);
}
@ -869,10 +954,19 @@ public:
virtual ~CodeArray()
{
if (isAllocType()) {
if (alloc_->useProtect()) protect(top_, maxSize_, false);
if (useProtect()) setProtectModeRW(false);
alloc_->free(top_);
}
}
bool setProtectMode(ProtectMode mode, bool throwException = true)
{
bool isOK = protect(top_, maxSize_, mode);
if (isOK) return true;
if (throwException) throw Error(ERR_CANT_PROTECT);
return false;
}
bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
void resetSize()
{
size_ = 0;
@ -904,10 +998,10 @@ public:
void dq(uint64 code) { db(code, 8); }
const uint8 *getCode() const { return top_; }
template<class F>
const F getCode() const { return CastTo<F>(top_); }
const F getCode() const { return reinterpret_cast<F>(top_); }
const uint8 *getCurr() const { return &top_[size_]; }
template<class F>
const F getCurr() const { return CastTo<F>(&top_[size_]); }
const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
size_t getSize() const { return size_; }
void setSize(size_t size)
{
@ -960,19 +1054,39 @@ public:
change exec permission of memory
@param addr [in] buffer address
@param size [in] buffer size
@param canExec [in] true(enable to exec), false(disable to exec)
@param protectMode [in] mode(RW/RWE/RE)
@return true(success), false(failure)
*/
static inline bool protect(const void *addr, size_t size, bool canExec)
static inline bool protect(const void *addr, size_t size, int protectMode)
{
#if defined(_WIN32)
const DWORD c_rw = PAGE_READWRITE;
const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
const DWORD c_re = PAGE_EXECUTE_READ;
DWORD mode;
#else
const int c_rw = PROT_READ | PROT_WRITE;
const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
const int c_re = PROT_READ | PROT_EXEC;
int mode;
#endif
switch (protectMode) {
case PROTECT_RW: mode = c_rw; break;
case PROTECT_RWE: mode = c_rwe; break;
case PROTECT_RE: mode = c_re; break;
default:
return false;
}
#if defined(_WIN32)
DWORD oldProtect;
return VirtualProtect(const_cast<void*>(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0;
return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0;
#elif defined(__GNUC__)
size_t pageSize = sysconf(_SC_PAGESIZE);
size_t iaddr = reinterpret_cast<size_t>(addr);
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0);
#ifndef NDEBUG
if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
#endif
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
#else
return true;
@ -999,46 +1113,43 @@ public:
M_ripAddr
};
Address(uint32 sizeBit, bool broadcast, const RegExp& e)
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), permitVsib_(false), broadcast_(broadcast)
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
{
e_.verify();
}
#ifdef XBYAK64
explicit Address(size_t disp)
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false){ }
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), permitVsib_(false), broadcast_(broadcast) { }
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
#endif
void permitVsib() const { permitVsib_ = true; }
RegExp getRegExp(bool optimize = true) const
{
return optimize ? e_.optimize() : e_;
}
Mode getMode() const { return mode_; }
bool is32bit() const { verify(); return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
bool isOnlyDisp() const { verify(); return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
size_t getDisp() const { verify(); return e_.getDisp(); }
bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
size_t getDisp() const { return e_.getDisp(); }
uint8 getRex() const
{
verify();
if (mode_ != M_ModRM) return 0;
return getRegExp().getRex();
}
bool is64bitDisp() const { verify(); return mode_ == M_64bitDisp; } // for moffset
bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
bool isBroadcast() const { return broadcast_; }
const Label* getLabel() const { return label_; }
bool operator==(const Address& rhs) const
{
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && permitVsib_ == rhs.permitVsib_ && broadcast_ == rhs.broadcast_;
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
}
bool operator!=(const Address& rhs) const { return !operator==(rhs); }
bool isVsib() const { return e_.isVsib(); }
private:
RegExp e_;
const Label* label_;
Mode mode_;
mutable bool permitVsib_;
bool broadcast_;
void verify() const { if (e_.isVsib() && !permitVsib_) throw Error(ERR_BAD_VSIB_ADDRESSING); }
};
inline const Address& Operand::getAddress() const
@ -1096,6 +1207,7 @@ public:
Label(const Label& rhs);
Label& operator=(const Label& rhs);
~Label();
void clear() { mgr = 0; id = 0; }
int getId() const { return id; }
const uint8 *getAddress() const;
@ -1134,6 +1246,7 @@ class LabelManager {
};
typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
CodeArray *base_;
// global : stateList_.front(), local : stateList_.back()
@ -1141,6 +1254,7 @@ class LabelManager {
mutable int labelId_;
ClabelDefList clabelDefList_;
ClabelUndefList clabelUndefList_;
LabelPtrList labelPtrList_;
int getId(const Label& label) const
{
@ -1189,9 +1303,14 @@ class LabelManager {
return true;
}
friend class Label;
void incRefCount(int id) { clabelDefList_[id].refCount++; }
void decRefCount(int id)
void incRefCount(int id, Label *label)
{
clabelDefList_[id].refCount++;
labelPtrList_.insert(label);
}
void decRefCount(int id, Label *label)
{
labelPtrList_.erase(label);
ClabelDefList::iterator i = clabelDefList_.find(id);
if (i == clabelDefList_.end()) return;
if (i->second.refCount == 1) {
@ -1210,11 +1329,23 @@ class LabelManager {
#endif
return !list.empty();
}
// detach all labels linked to LabelManager
void resetLabelPtrList()
{
for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
(*i)->clear();
}
labelPtrList_.clear();
}
public:
LabelManager()
{
reset();
}
~LabelManager()
{
resetLabelPtrList();
}
void reset()
{
base_ = 0;
@ -1224,6 +1355,7 @@ public:
stateList_.push_back(SlabelState());
clabelDefList_.clear();
clabelUndefList_.clear();
resetLabelPtrList();
}
void enterLocal()
{
@ -1256,10 +1388,11 @@ public:
SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
define_inner(st.defList, st.undefList, label, base_->getSize());
}
void defineClabel(const Label& label)
void defineClabel(Label& label)
{
define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
label.mgr = this;
labelPtrList_.insert(&label);
}
void assign(Label& dst, const Label& src)
{
@ -1267,6 +1400,7 @@ public:
if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
dst.mgr = this;
labelPtrList_.insert(&dst);
}
bool getOffset(size_t *offset, std::string& label) const
{
@ -1314,19 +1448,19 @@ inline Label::Label(const Label& rhs)
{
id = rhs.id;
mgr = rhs.mgr;
if (mgr) mgr->incRefCount(id);
if (mgr) mgr->incRefCount(id, this);
}
inline Label& Label::operator=(const Label& rhs)
{
if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
id = rhs.id;
mgr = rhs.mgr;
if (mgr) mgr->incRefCount(id);
if (mgr) mgr->incRefCount(id, this);
return *this;
}
inline Label::~Label()
{
if (id && mgr) mgr->decRefCount(id);
if (id && mgr) mgr->decRefCount(id, this);
}
inline const uint8* Label::getAddress() const
{
@ -1443,6 +1577,8 @@ private:
T_B32 = 1 << 26, // m32bcst
T_B64 = 1 << 27, // m64bcst
T_M_K = 1 << 28, // mem{k}
T_VSIB = 1 << 29,
T_MEM_EVEX = 1 << 30, // use evex if mem
T_XXX
};
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
@ -1480,7 +1616,7 @@ private:
if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
return v;
}
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0)
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
{
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
int w = (type & T_EW1) ? 1 : 0;
@ -1523,7 +1659,7 @@ private:
}
}
}
bool Vp = !(v ? v->isExtIdx2() : 0);
bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
db(0x62);
@ -1607,6 +1743,14 @@ private:
db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
opAddr(addr, reg.getIdx(), immSize);
}
void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
{
if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
rex(addr, reg);
db(code0); if (code1 != NONE) db(code1);
opAddr(addr, reg.getIdx());
}
void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
{
if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
@ -1631,6 +1775,7 @@ private:
db(longCode); dd(disp - longJmpSize);
}
}
bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); }
template<class T>
void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
{
@ -1640,7 +1785,7 @@ private:
makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref);
} else {
int jmpSize = 0;
if (type == T_NEAR) {
if (isNEAR(type)) {
jmpSize = 4;
if (longPref) db(longPref);
db(longCode); dd(0);
@ -1655,7 +1800,7 @@ private:
void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0)
{
if (isAutoGrow()) {
if (type != T_NEAR) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW);
if (!isNEAR(type)) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW);
if (size_ + 16 >= maxSize_) growMemory();
if (longPref) db(longPref);
db(longCode);
@ -1669,8 +1814,9 @@ private:
// reg is reg field of ModRM
// immSize is the size for immediate value
// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0)
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
{
if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING);
if (addr.getMode() == Address::M_ModRM) {
setSIB(addr.getRegExp(), reg, disp8N);
} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
@ -1914,10 +2060,11 @@ private:
const Address& addr = op2.getAddress();
const RegExp& regExp = addr.getRegExp();
const Reg& base = regExp.getBase();
const Reg& index = regExp.getIndex();
if (BIT == 64 && addr.is32bit()) db(0x67);
int disp8N = 0;
bool x = regExp.getIndex().isExtIdx();
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
bool x = index.isExtIdx();
if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
int aaa = addr.getOpmaskIdx();
if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
bool b = false;
@ -1925,12 +2072,12 @@ private:
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
b = true;
}
int VL = regExp.isVsib() ? regExp.getIndex().getBit() : 0;
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
int VL = regExp.isVsib() ? index.getBit() : 0;
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
} else {
vex(r, base, p1, type, code, x);
}
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N);
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
} else {
const Reg& base = op2.getReg();
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
@ -2031,8 +2178,7 @@ private:
}
if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING);
}
addr.permitVsib();
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type | T_YMM, code);
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code);
}
enum {
xx_yy_zz = 0,
@ -2056,7 +2202,6 @@ private:
{
if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
checkGather2(x, addr.getRegExp().getIndex(), mode);
addr.permitVsib();
opVex(x, 0, addr, type, code);
}
/*
@ -2076,9 +2221,30 @@ private:
{
if (addr.hasZero()) throw Error(ERR_INVALID_ZERO);
if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING);
addr.permitVsib();
opVex(x, 0, addr, type, code);
}
void opInOut(const Reg& a, const Reg& d, uint8 code)
{
if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) {
switch (a.getBit()) {
case 8: db(code); return;
case 16: db(0x66); db(code + 1); return;
case 32: db(code + 1); return;
}
}
throw Error(ERR_BAD_COMBINATION);
}
void opInOut(const Reg& a, uint8 code, uint8 v)
{
if (a.getIdx() == Operand::AL) {
switch (a.getBit()) {
case 8: db(code); db(v); return;
case 16: db(0x66); db(code + 1); db(v); return;
case 32: db(code + 1); db(v); return;
}
}
throw Error(ERR_BAD_COMBINATION);
}
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;
@ -2088,7 +2254,7 @@ public:
const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7;
const Ymm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
const Reg16 ax, cx, dx, bx, sp, bp, si, di;
const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
@ -2128,8 +2294,12 @@ public:
#ifndef XBYAK_DISABLE_SEGMENT
const Segment es, cs, ss, ds, fs, gs;
#endif
private:
bool isDefaultJmpNEAR_;
public:
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
void L(const Label& label) { labelMgr_.defineClabel(label); }
void L(Label& label) { labelMgr_.defineClabel(label); }
Label L() { Label label; L(label); return label; }
void inLocalLabel() { labelMgr_.enterLocal(); }
void outLocalLabel() { labelMgr_.leaveLocal(); }
/*
@ -2146,6 +2316,8 @@ public:
void putL(std::string label) { putL_inner(label); }
void putL(const Label& label) { putL_inner(label); }
// set default type of `jmp` of undefined label to T_NEAR
void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); }
void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
@ -2160,7 +2332,7 @@ public:
// call(function pointer)
#ifdef XBYAK_VARIADIC_TEMPLATE
template<class Ret, class... Params>
void call(Ret(*func)(Params...)) { call(CastTo<const void*>(func)); }
void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
#endif
void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
@ -2404,6 +2576,7 @@ public:
#ifndef XBYAK_DISABLE_SEGMENT
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
#endif
, isDefaultJmpNEAR_(false)
{
labelMgr_.set(this);
}
@ -2418,11 +2591,16 @@ public:
MUST call ready() to complete generating code if you use AutoGrow mode.
It is not necessary for the other mode if hasUndefinedLabel() is true.
*/
void ready()
void ready(ProtectMode mode = PROTECT_RWE)
{
if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
if (isAutoGrow()) calcJmpAddress();
if (isAutoGrow()) {
calcJmpAddress();
if (useProtect()) setProtectMode(mode);
}
}
// set read/exec
void readyRE() { return ready(PROTECT_RE); }
#ifdef XBYAK_TEST
void dump(bool doClear = true)
{

View File

@ -1,4 +1,4 @@
const char *getVersionString() const { return "5.65"; }
const char *getVersionString() const { return "5.891"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@ -58,7 +58,9 @@ void cdq() { db(0x99); }
void clc() { db(0xF8); }
void cld() { db(0xFC); }
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); }
void cli() { db(0xFA); }
void clzero() { db(0x0F); db(0x01); db(0xFC); }
void cmc() { db(0xF5); }
void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524
void cmovae(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }//-V524
@ -122,8 +124,11 @@ void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); }
void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); }
void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
void cmpsb() { db(0xA6); }
void cmpsd() { db(0xA7); }
void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
void cmpsw() { db(0x66); db(0xA7); }
void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); }
void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); }
void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); }
@ -167,6 +172,7 @@ void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void emms() { db(0x0F); db(0x77); }
void enter(uint16 x, uint8 y) { db(0xC8); dw(x); db(y); }
void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); }
void f2xm1() { db(0xD9); db(0xF0); }
void fabs() { db(0xD9); db(0xE1); }
@ -176,7 +182,10 @@ void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC
void faddp() { db(0xDE); db(0xC1); }
void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); }
void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
void fbld(const Address& addr) { opModM(addr, Reg32(4), 0xDF, 0x100); }
void fbstp(const Address& addr) { opModM(addr, Reg32(6), 0xDF, 0x100); }
void fchs() { db(0xD9); db(0xE0); }
void fclex() { db(0x9B); db(0xDB); db(0xE2); }
void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); }
void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); }
@ -237,6 +246,7 @@ void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); }
void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); }
void fld1() { db(0xD9); db(0xE8); }
void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); }
void fldenv(const Address& addr) { opModM(addr, Reg32(4), 0xD9, 0x100); }
void fldl2e() { db(0xD9); db(0xEA); }
void fldl2t() { db(0xD9); db(0xE9); }
void fldlg2() { db(0xD9); db(0xEC); }
@ -249,22 +259,33 @@ void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC
void fmulp() { db(0xDE); db(0xC9); }
void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); }
void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
void fnclex() { db(0xDB); db(0xE2); }
void fninit() { db(0xDB); db(0xE3); }
void fnop() { db(0xD9); db(0xD0); }
void fnsave(const Address& addr) { opModM(addr, Reg32(6), 0xDD, 0x100); }
void fnstcw(const Address& addr) { opModM(addr, Reg32(7), 0xD9, 0x100); }
void fnstenv(const Address& addr) { opModM(addr, Reg32(6), 0xD9, 0x100); }
void fnstsw(const Address& addr) { opModM(addr, Reg32(7), 0xDD, 0x100); }
void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0xDF); db(0xE0); }
void fpatan() { db(0xD9); db(0xF3); }
void fprem() { db(0xD9); db(0xF8); }
void fprem1() { db(0xD9); db(0xF5); }
void fptan() { db(0xD9); db(0xF2); }
void frndint() { db(0xD9); db(0xFC); }
void frstor(const Address& addr) { opModM(addr, Reg32(4), 0xDD, 0x100); }
void fsave(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xDD, 0x100); }
void fscale() { db(0xD9); db(0xFD); }
void fsin() { db(0xD9); db(0xFE); }
void fsincos() { db(0xD9); db(0xFB); }
void fsqrt() { db(0xD9); db(0xFA); }
void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); }
void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); }
void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, 0x100); }
void fstenv(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xD9, 0x100); }
void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
void fstsw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xDD, 0x100); }
void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }
void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); }
void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
@ -291,6 +312,7 @@ void fwait() { db(0x9B); }
void fxam() { db(0xD9); db(0xE5); }
void fxch() { db(0xD9); db(0xC9); }
void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
void fxrstor(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xAE); }
void fxtract() { db(0xD9); db(0xF4); }
void fyl2x() { db(0xD9); db(0xF1); }
void fyl2xp1() { db(0xD9); db(0xF9); }
@ -303,8 +325,12 @@ void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXM
void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); }
void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }
void in_(const Reg& a, uint8 v) { opInOut(a, 0xE4, v); }
void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
void int3() { db(0xCC); }
void int_(uint8 x) { db(0xCD); db(x); }
void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524
void ja(const char *label, LabelType type = T_AUTO) { ja(std::string(label), type); }//-V524
void ja(const void *addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }//-V524
@ -429,8 +455,24 @@ void lahf() { db(0x9F); }
void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }
void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }
void leave() { db(0xC9); }
void lfence() { db(0x0F); db(0xAE); db(0xE8); }
void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); }
void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); }
void lock() { db(0xF0); }
void lodsb() { db(0xAC); }
void lodsd() { db(0xAD); }
void lodsw() { db(0x66); db(0xAD); }
void loop(const Label& label) { opJmp(label, T_SHORT, 0xE2, 0, 0); }
void loop(const char *label) { loop(std::string(label)); }
void loop(std::string label) { opJmp(label, T_SHORT, 0xE2, 0, 0); }
void loope(const Label& label) { opJmp(label, T_SHORT, 0xE1, 0, 0); }
void loope(const char *label) { loope(std::string(label)); }
void loope(std::string label) { opJmp(label, T_SHORT, 0xE1, 0, 0); }
void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
void loopne(const char *label) { loopne(std::string(label)); }
void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); }
void lzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); }
void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }
@ -444,6 +486,7 @@ void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXM
void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); }
void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); }
void monitor() { db(0x0F); db(0x01); db(0xC8); }
void monitorx() { db(0x0F); db(0x01); db(0xFA); }
void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); }
void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); }
void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); }
@ -500,12 +543,18 @@ void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM
void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); }
void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf6, true); }
void mwait() { db(0x0F); db(0x01); db(0xC9); }
void mwaitx() { db(0x0F); db(0x01); db(0xFB); }
void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); }
void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); }
void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); }
void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }
void out_(uint8 v, const Reg& a) { opInOut(a, 0xE6, v); }
void outsb() { db(0x6E); }
void outsd() { db(0x6F); }
void outsw() { db(0x66); db(0x6F); }
void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); }
void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); }
void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); }
@ -663,6 +712,10 @@ void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER
void rdtsc() { db(0x0F); db(0x31); }
void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
void rep() { db(0xF3); }
void repe() { db(0xF3); }
void repne() { db(0xF2); }
void repnz() { db(0xF2); }
void repz() { db(0xF3); }
void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
@ -683,6 +736,9 @@ void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); }
void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
void scasb() { db(0xAE); }
void scasd() { db(0xAF); }
void scasw() { db(0x66); db(0xAF); }
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
@ -742,12 +798,17 @@ void stc() { db(0xF9); }
void std() { db(0xFD); }
void sti() { db(0xFB); }
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
void stosb() { db(0xAA); }
void stosd() { db(0xAB); }
void stosw() { db(0x66); db(0xAB); }
void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); }
void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); }
void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
void sysenter() { db(0x0F); db(0x34); }
void sysexit() { db(0x0F); db(0x35); }
void tzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); }
void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
@ -1001,10 +1062,10 @@ void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_X
void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBE); }
void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBF); }
void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBF); }
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x92, 0); }
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x92, 1); }
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x93, 1); }
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x93, 2); }
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); }
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); }
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); }
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); }
void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
@ -1014,7 +1075,7 @@ void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
@ -1135,10 +1196,10 @@ void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x90, 1); }
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x90, 0); }
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x91, 2); }
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x91, 1); }
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); }
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); }
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); }
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x91, 1); }
void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x02); }
void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x03); }
void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x01); }
@ -1197,28 +1258,28 @@ void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm,
void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
@ -1544,8 +1605,17 @@ void jrcxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
void cdqe() { db(0x48); db(0x98); }
void cqo() { db(0x48); db(0x99); }
void cmpsq() { db(0x48); db(0xA7); }
void popfq() { db(0x9D); }
void pushfq() { db(0x9C); }
void lodsq() { db(0x48); db(0xAD); }
void movsq() { db(0x48); db(0xA5); }
void scasq() { db(0x48); db(0xAF); }
void stosq() { db(0x48); db(0xAB); }
void syscall() { db(0x0F); db(0x05); }
void sysret() { db(0x0F); db(0x07); }
void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); }
void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
@ -1568,12 +1638,15 @@ void aam() { db(0xD4); db(0x0A); }
void aas() { db(0x3F); }
void daa() { db(0x27); }
void das() { db(0x2F); }
void into() { db(0xCE); }
void popad() { db(0x61); }
void popfd() { db(0x9D); }
void pusha() { db(0x60); }
void pushad() { db(0x60); }
void pushfd() { db(0x9C); }
void popa() { db(0x61); }
void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC5, 0x100); }
void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100); }
#endif
#ifndef XBYAK_NO_OP_NAMES
void and(const Operand& op1, const Operand& op2) { and_(op1, op2); }
@ -1664,14 +1737,16 @@ void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_
void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }
void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }
void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); }
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); }
void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }
void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
@ -1697,6 +1772,7 @@ void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T
void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); }
@ -1713,22 +1789,22 @@ void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { o
void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x92, 1); }
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x92, 0); }
void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x93, 0); }
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x93, 2); }
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
@ -1757,6 +1833,8 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
@ -1815,10 +1893,10 @@ void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1 | T
void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); }
void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); }
void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x90, 0); }
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x90, 1); }
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x91, 2); }
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x91, 0); }
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 0); }
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 1); }
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 2); }
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
@ -1869,10 +1947,10 @@ void vprord(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.get
void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); }
void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); }
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 0); }
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 1); }
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 2); }
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 0); }
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); }
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); }
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); }
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); }
void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
@ -1936,18 +2014,18 @@ void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 1); }
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 0); }
void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 0); }
void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 2); }
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); }
void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); }
void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }

View File

@ -1,5 +1,6 @@
#ifndef XBYAK_XBYAK_UTIL_H_
#define XBYAK_XBYAK_UTIL_H_
#include <string.h>
/**
utility class and functions for Xbyak
@ -9,6 +10,11 @@
*/
#include "xbyak.h"
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define XBYAK_INTEL_CPU_SPECIFIC
#endif
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
#if (_MSC_VER < 1400) && defined(XBYAK32)
static inline __declspec(naked) void __cpuid(int[4], int)
@ -47,14 +53,44 @@
#endif
#endif
#endif
#endif
#ifdef XBYAK_USE_VTUNE
// -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
#include <jitprofiling.h>
#ifdef _MSC_VER
#pragma comment(lib, "libittnotify.lib")
#endif
#ifdef __linux__
#include <dlfcn.h>
#endif
#endif
#ifdef __linux__
#define XBYAK_USE_PERF
#endif
namespace Xbyak { namespace util {
typedef enum {
SmtLevel = 1,
CoreLevel = 2
} IntelCpuTopologyLevel;
/**
CPU detection class
*/
class Cpu {
uint64 type_;
//system topology
bool x2APIC_supported_;
static const size_t maxTopologyLevels = 2;
unsigned int numCores_[maxTopologyLevels];
static const unsigned int maxNumberCacheLevels = 10;
unsigned int dataCacheSize_[maxNumberCacheLevels];
unsigned int coresSharignDataCache_[maxNumberCacheLevels];
unsigned int dataCacheLevels_;
unsigned int get32bitAsBE(const char *x) const
{
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
@ -65,7 +101,7 @@ class Cpu {
}
void setFamily()
{
unsigned int data[4];
unsigned int data[4] = {};
getCpuid(1, data);
stepping = data[0] & mask(4);
model = (data[0] >> 4) & mask(4);
@ -88,6 +124,44 @@ class Cpu {
{
return (val >> base) & ((1u << (end - base)) - 1);
}
void setNumCores()
{
if ((type_ & tINTEL) == 0) return;
unsigned int data[4] = {};
/* CAUTION: These numbers are configuration as shipped by Intel. */
getCpuidEx(0x0, 0, data);
if (data[0] >= 0xB) {
/*
if leaf 11 exists(x2APIC is supported),
we use it to get the number of smt cores and cores on socket
leaf 0xB can be zeroed-out by a hypervisor
*/
x2APIC_supported_ = true;
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
getCpuidEx(0xB, i, data);
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
if (level == SmtLevel || level == CoreLevel) {
numCores_[level - 1] = extractBit(data[1], 0, 15);
}
}
/*
Fallback values in case a hypervisor has 0xB leaf zeroed-out.
*/
numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]);
numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
} else {
/*
Failed to deremine num of cores without x2APIC support.
TODO: USE initial APIC ID to determine ncores.
*/
numCores_[SmtLevel - 1] = 0;
numCores_[CoreLevel - 1] = 0;
}
}
void setCacheHierarchy()
{
if ((type_ & tINTEL) == 0) return;
@ -96,21 +170,12 @@ class Cpu {
// const unsigned int INSTRUCTION_CACHE = 2;
const unsigned int UNIFIED_CACHE = 3;
unsigned int smt_width = 0;
unsigned int n_cores = 0;
unsigned int data[4];
unsigned int logical_cores = 0;
unsigned int data[4] = {};
/*
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
If x2APIC is supported, these are the only correct numbers.
leaf 0xB can be zeroed-out by a hypervisor
*/
getCpuidEx(0x0, 0, data);
if (data[0] >= 0xB) {
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
smt_width = data[1] & 0x7FFF;
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
n_cores = data[1] & 0x7FFF;
if (x2APIC_supported_) {
smt_width = numCores_[0];
logical_cores = numCores_[1];
}
/*
@ -118,29 +183,29 @@ class Cpu {
the first level of data cache is not shared (which is the
case for every existing architecture) and use this to
determine the SMT width for arch not supporting leaf 11.
when leaf 4 reports a number of core less than n_cores
when leaf 4 reports a number of core less than numCores_
on socket reported by leaf 11, then it is a correct number
of cores not an upperbound.
*/
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
getCpuidEx(0x4, i, data);
unsigned int cacheType = extractBit(data[0], 0, 4);
if (cacheType == NO_CACHE) break;
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
if (n_cores != 0) { // true only if leaf 0xB is supported and valid
nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
}
assert(nb_logical_cores != 0);
data_cache_size[data_cache_levels] =
assert(actual_logical_cores != 0);
dataCacheSize_[dataCacheLevels_] =
(extractBit(data[1], 22, 31) + 1)
* (extractBit(data[1], 12, 21) + 1)
* (extractBit(data[1], 0, 11) + 1)
* (data[2] + 1);
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
assert(smt_width != 0);
cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width;
data_cache_levels++;
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
dataCacheLevels_++;
}
}
}
@ -154,22 +219,25 @@ public:
int displayFamily; // family + extFamily
int displayModel; // model + extModel
// may I move these members into private?
static const unsigned int maxNumberCacheLevels = 10;
unsigned int data_cache_size[maxNumberCacheLevels];
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
unsigned int data_cache_levels;
unsigned int getNumCores(IntelCpuTopologyLevel level) {
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
switch (level) {
case SmtLevel: return numCores_[level - 1];
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
}
}
unsigned int getDataCacheLevels() const { return data_cache_levels; }
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
unsigned int getCoresSharingDataCache(unsigned int i) const
{
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
return cores_sharing_data_cache[i];
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
return coresSharignDataCache_[i];
}
unsigned int getDataCacheSize(unsigned int i) const
{
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
return data_cache_size[i];
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
return dataCacheSize_[i];
}
/*
@ -177,30 +245,45 @@ public:
*/
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
{
#ifdef _MSC_VER
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
__cpuid(reinterpret_cast<int*>(data), eaxIn);
#else
#else
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
#else
(void)eaxIn;
(void)data;
#endif
}
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
{
#ifdef _MSC_VER
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
#else
#else
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
#endif
#else
(void)eaxIn;
(void)ecxIn;
(void)data;
#endif
}
static inline uint64 getXfeature()
{
#ifdef _MSC_VER
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
return _xgetbv(0);
#else
#else
unsigned int eax, edx;
// xgetvb is not support on gcc 4.2
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64)edx << 32) | eax;
#endif
#else
return 0;
#endif
}
typedef uint64 Type;
@ -268,12 +351,18 @@ public:
static const Type tAVX512_VNNI = uint64(1) << 54;
static const Type tAVX512_BITALG = uint64(1) << 55;
static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
static const Type tAVX512_BF16 = uint64(1) << 57;
static const Type tAVX512_VP2INTERSECT = uint64(1) << 58;
Cpu()
: type_(NONE)
, data_cache_levels(0)
, x2APIC_supported_(false)
, numCores_()
, dataCacheSize_()
, coresSharignDataCache_()
, dataCacheLevels_(0)
{
unsigned int data[4];
unsigned int data[4] = {};
const unsigned int& EAX = data[0];
const unsigned int& EBX = data[1];
const unsigned int& ECX = data[2];
@ -343,6 +432,12 @@ public:
if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
}
// EAX=07H, ECX=1
getCpuidEx(7, 1, data);
if (type_ & tAVX512F) {
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
}
}
}
@ -363,6 +458,7 @@ public:
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
}
setFamily();
setNumCores();
setCacheHierarchy();
}
void putFamily() const
@ -381,12 +477,17 @@ class Clock {
public:
static inline uint64 getRdtsc()
{
#ifdef _MSC_VER
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
return __rdtsc();
#else
#else
unsigned int eax, edx;
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
return ((uint64)edx << 32) | eax;
#endif
#else
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
return 0;
#endif
}
Clock()
@ -416,7 +517,7 @@ const int UseRCX = 1 << 6;
const int UseRDX = 1 << 7;
class Pack {
static const size_t maxTblNum = 10;
static const size_t maxTblNum = 15;
const Xbyak::Reg64 *tbl_[maxTblNum];
size_t n_;
public:
@ -476,7 +577,7 @@ public:
const Xbyak::Reg64& operator[](size_t n) const
{
if (n >= n_) {
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
throw Error(ERR_BAD_PARAMETER);
}
return *tbl_[n];
@ -518,6 +619,7 @@ class StackFrame {
static const int rcxPos = 3;
static const int rdxPos = 2;
#endif
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
Xbyak::CodeGenerator *code_;
int pNum_;
int tNum_;
@ -527,7 +629,7 @@ class StackFrame {
int P_;
bool makeEpilog_;
Xbyak::Reg64 pTbl_[4];
Xbyak::Reg64 tTbl_[10];
Xbyak::Reg64 tTbl_[maxRegNum];
Pack p_;
Pack t_;
StackFrame(const StackFrame&);
@ -539,7 +641,7 @@ public:
make stack frame
@param sf [in] this
@param pNum [in] num of function parameter(0 <= pNum <= 4)
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
@param stackSizeByte [in] local stack size
@param makeEpilog [in] automatically call close() if true
@ -566,27 +668,17 @@ public:
using namespace Xbyak;
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
const Reg64& _rsp = code->rsp;
const AddressFrame& _ptr = code->ptr;
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
const int *tbl = getOrderTbl() + noSaveNum;
P_ = saveNum_ + (stackSizeByte + 7) / 8;
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
for (int i = 0; i < saveNum_; i++) {
code->push(Reg64(tbl[i]));
}
P_ = (stackSizeByte + 7) / 8;
if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
P_ *= 8;
if (P_ > 0) code->sub(_rsp, P_);
#ifdef XBYAK64_WIN
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
}
for (int i = 4; i < saveNum_; i++) {
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
}
#else
for (int i = 0; i < saveNum_; i++) {
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
}
#endif
int pos = 0;
for (int i = 0; i < pNum; i++) {
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
@ -607,36 +699,18 @@ public:
{
using namespace Xbyak;
const Reg64& _rsp = code_->rsp;
const AddressFrame& _ptr = code_->ptr;
const int *tbl = getOrderTbl() + noSaveNum;
#ifdef XBYAK64_WIN
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
}
for (int i = 4; i < saveNum_; i++) {
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
}
#else
for (int i = 0; i < saveNum_; i++) {
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
}
#endif
if (P_ > 0) code_->add(_rsp, P_);
for (int i = 0; i < saveNum_; i++) {
code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
}
if (callRet) code_->ret();
}
~StackFrame()
{
if (!makeEpilog_) return;
try {
close();
} catch (std::exception& e) {
printf("ERR:StackFrame %s\n", e.what());
exit(1);
} catch (...) {
printf("ERR:StackFrame otherwise\n");
exit(1);
}
close();
}
private:
const int *getOrderTbl() const
@ -654,7 +728,7 @@ private:
}
int getRegIdx(int& pos) const
{
assert(pos < 14);
assert(pos < maxRegNum);
using namespace Xbyak;
const int *tbl = getOrderTbl();
int r = tbl[pos++];
@ -671,5 +745,135 @@ private:
};
#endif
class Profiler {
int mode_;
const char *suffix_;
const void *startAddr_;
#ifdef XBYAK_USE_PERF
FILE *fp_;
#endif
public:
enum {
None = 0,
Perf = 1,
VTune = 2
};
Profiler()
: mode_(None)
, suffix_("")
, startAddr_(0)
#ifdef XBYAK_USE_PERF
, fp_(0)
#endif
{
}
// append suffix to funcName
void setNameSuffix(const char *suffix)
{
suffix_ = suffix;
}
void setStartAddr(const void *startAddr)
{
startAddr_ = startAddr;
}
void init(int mode)
{
mode_ = None;
switch (mode) {
default:
case None:
return;
case Perf:
#ifdef XBYAK_USE_PERF
close();
{
const int pid = getpid();
char name[128];
snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid);
fp_ = fopen(name, "a+");
if (fp_ == 0) {
fprintf(stderr, "can't open %s\n", name);
return;
}
}
mode_ = Perf;
#endif
return;
case VTune:
#ifdef XBYAK_USE_VTUNE
dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling
if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) {
fprintf(stderr, "VTune profiling is not active\n");
return;
}
mode_ = VTune;
#endif
return;
}
}
~Profiler()
{
close();
}
void close()
{
#ifdef XBYAK_USE_PERF
if (fp_ == 0) return;
fclose(fp_);
fp_ = 0;
#endif
}
void set(const char *funcName, const void *startAddr, size_t funcSize) const
{
if (mode_ == None) return;
#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE)
(void)funcName;
(void)startAddr;
(void)funcSize;
#endif
#ifdef XBYAK_USE_PERF
if (mode_ == Perf) {
if (fp_ == 0) return;
fprintf(fp_, "%llx %zx %s%s", (long long)startAddr, funcSize, funcName, suffix_);
/*
perf does not recognize the function name which is less than 3,
so append '_' at the end of the name if necessary
*/
size_t n = strlen(funcName) + strlen(suffix_);
for (size_t i = n; i < 3; i++) {
fprintf(fp_, "_");
}
fprintf(fp_, "\n");
fflush(fp_);
}
#endif
#ifdef XBYAK_USE_VTUNE
if (mode_ != VTune) return;
char className[] = "";
char fileName[] = "";
iJIT_Method_Load jmethod = {};
jmethod.method_id = iJIT_GetNewMethodID();
jmethod.class_file_name = className;
jmethod.source_file_name = fileName;
jmethod.method_load_address = const_cast<void*>(startAddr);
jmethod.method_size = funcSize;
jmethod.line_number_size = 0;
char buf[128];
snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_);
jmethod.method_name = buf;
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod);
#endif
}
/*
for continuous set
funcSize = endAddr - <previous set endAddr>
*/
void set(const char *funcName, const void *endAddr)
{
set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_);
startAddr_ = endAddr;
}
};
} } // end of util
#endif

View File

@ -21,8 +21,7 @@
#if HOST_CPU == CPU_X64 && FEAT_DSPREC != DYNAREC_NONE
#include "deps/xbyak/xbyak.h"
#include "deps/xbyak/xbyak_util.h"
#include <xbyak/xbyak.h>
#include "dsp.h"
#include "hw/aica/aica_if.h"
#include "hw/mem/_vmem.h"

View File

@ -7,8 +7,8 @@
//#define PROFILING
//#define CANONICAL_TEST
#include "deps/xbyak/xbyak.h"
#include "deps/xbyak/xbyak_util.h"
#include <xbyak/xbyak.h>
#include <xbyak/xbyak_util.h>
#include "types.h"
#include "hw/sh4/sh4_opcode_list.h"

View File

@ -22,7 +22,7 @@
//#define OLD_REGALLOC
#include "deps/xbyak/xbyak.h"
#include <xbyak/xbyak.h>
#ifdef OLD_REGALLOC
#include "hw/sh4/dyna/regalloc.h"
#else

View File

@ -3036,7 +3036,6 @@
TARGET_NO_NIXPROF,
TARGET_NO_COREIO_HTTP,
TARGET_NO_AREC,
XBYAK_NO_OP_NAMES,
TARGET_NO_OPENMP,
ENABLE_MODEM,
CHD5_LZMA,
@ -3090,7 +3089,6 @@
GCC_PREPROCESSOR_DEFINITIONS = (
TARGET_NO_NIXPROF,
TARGET_NO_COREIO_HTTP,
XBYAK_NO_OP_NAMES,
TARGET_NO_OPENMP,
ENABLE_MODEM,
CHD5_LZMA,
@ -3150,6 +3148,7 @@
../../../core/deps/zlib,
../../../core/deps/glslang,
../../../core/deps/glm,
../../../core/deps/xbyak,
/usr/local/include,
);
INFOPLIST_FILE = "emulator-osx/Info.plist";
@ -3198,6 +3197,7 @@
../../../core/deps/zlib,
../../../core/deps/glslang,
../../../core/deps/glm,
../../../core/deps/xbyak,
/usr/local/include,
);
INFOPLIST_FILE = "emulator-osx/Info.plist";
@ -3283,7 +3283,6 @@
GCC_PREPROCESSOR_DEFINITIONS = (
TARGET_NO_NIXPROF,
TARGET_NO_COREIO_HTTP,
XBYAK_NO_OP_NAMES,
TARGET_NO_OPENMP,
ENABLE_MODEM,
CHD5_LZMA,
@ -3344,6 +3343,7 @@
../../../core/deps/zlib,
../../../core/deps/glslang,
../../../core/deps/glm,
../../../core/deps/xbyak,
/usr/local/include,
);
INFOPLIST_FILE = "emulator-osx/Info.plist";
@ -3410,7 +3410,6 @@
GCC_PREPROCESSOR_DEFINITIONS = (
TARGET_NO_NIXPROF,
TARGET_NO_COREIO_HTTP,
XBYAK_NO_OP_NAMES,
TARGET_NO_OPENMP,
ENABLE_MODEM,
CHD5_LZMA,
@ -3465,6 +3464,7 @@
../../../core/deps/zlib,
../../../core/deps/glslang,
../../../core/deps/glm,
../../../core/deps/xbyak,
/usr/local/include,
);
INFOPLIST_FILE = "emulator-osx/Info.plist";