deps: Update xbyak to version 5.891
This commit is contained in:
parent
484a7cdd10
commit
fef96c125a
|
@ -139,7 +139,7 @@ endif
|
|||
|
||||
RZDCY_CFLAGS += -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/rend/gles -I$(RZDCY_SRC_DIR)/deps \
|
||||
-I$(RZDCY_SRC_DIR)/deps/vixl -I$(RZDCY_SRC_DIR)/khronos -I$(RZDCY_SRC_DIR)/deps/glslang \
|
||||
-I$(RZDCY_SRC_DIR)/deps/glm
|
||||
-I$(RZDCY_SRC_DIR)/deps/glm -I$(RZDCY_SRC_DIR)/deps/xbyak
|
||||
|
||||
ifdef USE_MODEM
|
||||
RZDCY_CFLAGS += -DENABLE_MODEM -I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
sudo: true
|
||||
dist: bionic
|
||||
language: cpp
|
||||
compiler:
|
||||
- gcc
|
||||
- clang
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- nasm yasm g++-multilib tcsh
|
||||
script:
|
||||
- make test
|
|
@ -0,0 +1,6 @@
|
|||
cmake_minimum_required(VERSION 2.6)
|
||||
project(xbyak)
|
||||
|
||||
file(GLOB headers xbyak/*.h)
|
||||
install(FILES ${headers} DESTINATION include/xbyak)
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
PREFIX=/usr/local
|
||||
INSTALL_DIR=$(PREFIX)/include/xbyak
|
||||
|
||||
all:
|
||||
$(MAKE) -C sample
|
||||
|
||||
clean:
|
||||
$(MAKE) -C sample clean
|
||||
|
||||
install:
|
||||
mkdir -p $(INSTALL_DIR)
|
||||
cp -pR xbyak/*.h $(INSTALL_DIR)
|
||||
|
||||
uninstall:
|
||||
rm -i $(INSTALL_DIR)/*.h
|
||||
rmdir $(INSTALL_DIR)
|
||||
|
||||
update:
|
||||
$(MAKE) -C gen
|
||||
|
||||
test:
|
||||
$(MAKE) -C test test
|
||||
|
||||
.PHONY: test update
|
|
@ -0,0 +1,577 @@
|
|||
[](https://travis-ci.org/herumi/xbyak)
|
||||
|
||||
# Xbyak 5.891 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
|
||||
## Abstract
|
||||
|
||||
Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
|
||||
|
||||
## Feature
|
||||
* header file only
|
||||
* Intel/MASM like syntax
|
||||
* fully support AVX-512
|
||||
|
||||
**Note**:
|
||||
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
|
||||
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||
|
||||
### Supported OS
|
||||
|
||||
* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
|
||||
* Linux(32bit, 64bit)
|
||||
* Intel macOS
|
||||
|
||||
### Supported Compilers
|
||||
|
||||
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
|
||||
|
||||
## Install
|
||||
|
||||
The following files are necessary. Please add the path to your compile directory.
|
||||
|
||||
* xbyak.h
|
||||
* xbyak_mnemonic.h
|
||||
* xbyak_util.h
|
||||
|
||||
Linux:
|
||||
```
|
||||
make install
|
||||
```
|
||||
|
||||
These files are copied into `/usr/local/include/xbyak`.
|
||||
|
||||
## How to use it
|
||||
|
||||
Inherit `Xbyak::CodeGenerator` class and make the class method.
|
||||
```
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int x)
|
||||
{
|
||||
mov(eax, x);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
```
|
||||
Or you can pass the instance of CodeGenerator without inheriting.
|
||||
```
|
||||
void genCode(Xbyak::CodeGenerator& code, int x) {
|
||||
using namespace Xbyak::util;
|
||||
code.mov(eax, x);
|
||||
code.ret();
|
||||
}
|
||||
```
|
||||
|
||||
Make an instance of the class and get the function
|
||||
pointer by calling `getCode()` and call it.
|
||||
```
|
||||
Code c(5);
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("ret=%d\n", f()); // ret = 5
|
||||
```
|
||||
|
||||
## Syntax
|
||||
Similar to MASM/NASM syntax with parentheses.
|
||||
|
||||
```
|
||||
NASM Xbyak
|
||||
mov eax, ebx --> mov(eax, ebx);
|
||||
inc ecx inc(ecx);
|
||||
ret --> ret();
|
||||
```
|
||||
|
||||
## Addressing
|
||||
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
|
||||
otherwise use `ptr`.
|
||||
|
||||
```
|
||||
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||
[rip + 32bit disp] ; x64 only
|
||||
|
||||
NASM Xbyak
|
||||
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
|
||||
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
|
||||
test byte [esp], 4 --> test(byte [esp], 4);
|
||||
inc qword [rax] --> inc(qword [rax]);
|
||||
```
|
||||
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
|
||||
|
||||
### How to use Selector (Segment Register)
|
||||
```
|
||||
mov eax, [fs:eax] --> putSeg(fs);
|
||||
mov(eax, ptr [eax]);
|
||||
mov ax, cs --> mov(ax, cs);
|
||||
```
|
||||
**Note**: Segment class is not derived from `Operand`.
|
||||
|
||||
## AVX
|
||||
|
||||
```
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
||||
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
||||
```
|
||||
|
||||
**Note**:
|
||||
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
|
||||
But the newer version will not support it.
|
||||
```
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
```
|
||||
|
||||
## AVX-512
|
||||
|
||||
```
|
||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
|
||||
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
|
||||
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
|
||||
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
|
||||
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
|
||||
|
||||
vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]);
|
||||
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
|
||||
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
|
||||
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
|
||||
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
|
||||
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
|
||||
|
||||
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
|
||||
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
|
||||
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
|
||||
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
|
||||
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
|
||||
|
||||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
```
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||
* `k4 | k3` is different from `k3 | k4`.
|
||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
||||
|
||||
## Label
|
||||
Two kinds of Label are supported. (String literal and Label class).
|
||||
|
||||
### String literal
|
||||
```
|
||||
L("L1");
|
||||
jmp("L1");
|
||||
|
||||
jmp("L2");
|
||||
...
|
||||
a few mnemonics (8-bit displacement jmp)
|
||||
...
|
||||
L("L2");
|
||||
|
||||
jmp("L3", T_NEAR);
|
||||
...
|
||||
a lot of mnemonics (32-bit displacement jmp)
|
||||
...
|
||||
L("L3");
|
||||
```
|
||||
|
||||
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
|
||||
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
|
||||
|
||||
### Support `@@`, `@f`, `@b` like MASM
|
||||
|
||||
```
|
||||
L("@@"); // <A>
|
||||
jmp("@b"); // jmp to <A>
|
||||
jmp("@f"); // jmp to <B>
|
||||
L("@@"); // <B>
|
||||
jmp("@b"); // jmp to <B>
|
||||
mov(eax, "@b");
|
||||
jmp(eax); // jmp to <B>
|
||||
```
|
||||
|
||||
### Local label
|
||||
|
||||
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
|
||||
are treated as a local label.
|
||||
`inLocalLabel()` and `outLocalLabel()` can be nested.
|
||||
|
||||
```
|
||||
void func1()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <A> ; local label
|
||||
...
|
||||
jmp(".lp"); // jmp to <A>
|
||||
L("aaa"); // global label <C>
|
||||
outLocalLabel();
|
||||
|
||||
inLocalLabel();
|
||||
L(".lp"); // <B> ; local label
|
||||
func1();
|
||||
jmp(".lp"); // jmp to <B>
|
||||
inLocalLabel();
|
||||
jmp("aaa"); // jmp to <C>
|
||||
}
|
||||
```
|
||||
|
||||
### short and long jump
|
||||
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
|
||||
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.
|
||||
|
||||
```
|
||||
jmp("short-jmp"); // short jmp
|
||||
// small code
|
||||
L("short-jmp");
|
||||
|
||||
jmp("long-jmp");
|
||||
// long code
|
||||
L("long-jmp"); // throw exception
|
||||
```
|
||||
Then specify T_NEAR for jmp.
|
||||
```
|
||||
jmp("long-jmp", T_NEAR); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
```
|
||||
Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
|
||||
```
|
||||
jmp("long-jmp"); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
```
|
||||
|
||||
### Label class
|
||||
|
||||
`L()` and `jxx()` support Label class.
|
||||
|
||||
```
|
||||
Xbyak::Label label1, label2;
|
||||
L(label1);
|
||||
...
|
||||
jmp(label1);
|
||||
...
|
||||
jmp(label2);
|
||||
...
|
||||
L(label2);
|
||||
```
|
||||
|
||||
Use `putL` for jmp table
|
||||
```
|
||||
Label labelTbl, L0, L1, L2;
|
||||
mov(rax, labelTbl);
|
||||
// rdx is an index of jump table
|
||||
jmp(ptr [rax + rdx * sizeof(void*)]);
|
||||
L(labelTbl);
|
||||
putL(L0);
|
||||
putL(L1);
|
||||
putL(L2);
|
||||
L(L0);
|
||||
....
|
||||
L(L1);
|
||||
....
|
||||
```
|
||||
|
||||
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
|
||||
|
||||
```
|
||||
Label label2;
|
||||
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
|
||||
...
|
||||
jmp(label2); // label2 is not determined here
|
||||
...
|
||||
assignL(label2, label1); // label2 <- label1
|
||||
```
|
||||
The `jmp` in the above code jumps to label1 assigned by `assignL`.
|
||||
|
||||
**Note**:
|
||||
* srcLabel must be used in `L()`.
|
||||
* dstLabel must not be used in `L()`.
|
||||
|
||||
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
|
||||
```
|
||||
// not AutoGrow mode
|
||||
Label label;
|
||||
assert(label.getAddress() == 0);
|
||||
L(label);
|
||||
assert(label.getAddress() == getCurr());
|
||||
```
|
||||
|
||||
### Rip ; relative addressing
|
||||
```
|
||||
Label label;
|
||||
mov(eax, ptr [rip + label]); // eax = 4
|
||||
...
|
||||
|
||||
L(label);
|
||||
dd(4);
|
||||
```
|
||||
```
|
||||
int x;
|
||||
...
|
||||
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
||||
```
|
||||
|
||||
## Code size
|
||||
The default max code size is 4096 bytes.
|
||||
Specify the size in constructor of `CodeGenerator()` if necessary.
|
||||
|
||||
```
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Quantize()
|
||||
: CodeGenerator(8192)
|
||||
{
|
||||
}
|
||||
...
|
||||
};
|
||||
```
|
||||
|
||||
## User allocated memory
|
||||
|
||||
You can make jit code on prepaired memory.
|
||||
|
||||
Call `setProtectModeRE` yourself to change memory mode if using the prepaired memory.
|
||||
|
||||
```
|
||||
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
|
||||
{
|
||||
mov(rax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
Code c;
|
||||
c.setProtectModeRE(); // set memory to Read/Exec
|
||||
printf("%d\n", c.getCode<int(*)()>()());
|
||||
}
|
||||
```
|
||||
|
||||
**Note**: See [sample/test0.cpp](sample/test0.cpp).
|
||||
|
||||
### AutoGrow
|
||||
|
||||
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
|
||||
|
||||
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
||||
{
|
||||
...
|
||||
}
|
||||
};
|
||||
Code c;
|
||||
// generate code for jit
|
||||
c.ready(); // mode = Read/Write/Exec
|
||||
```
|
||||
|
||||
**Note**:
|
||||
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
|
||||
|
||||
### Read/Exec mode
|
||||
Xbyak set Read/Write/Exec mode to memory to run jit code.
|
||||
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
|
||||
call `setProtectModeRE()` after generating jit code.
|
||||
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.setProtectModeRE();
|
||||
...
|
||||
|
||||
```
|
||||
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
|
||||
See [protect-re.cpp](sample/protect-re.cpp).
|
||||
|
||||
## Macro
|
||||
|
||||
* **XBYAK32** is defined on 32bit.
|
||||
* **XBYAK64** is defined on 64bit.
|
||||
* **XBYAK64_WIN** is defined on 64bit Windows(VC)
|
||||
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin
|
||||
* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, ....
|
||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future)
|
||||
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro
|
||||
|
||||
## Sample
|
||||
|
||||
* [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64)
|
||||
* [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
|
||||
* [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
|
||||
* [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64)
|
||||
|
||||
## License
|
||||
|
||||
modified new BSD License
|
||||
http://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
## History
|
||||
* 2020/Feb/26 ver 5.891 fix typo of type
|
||||
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
|
||||
* 2019/Dec/20 ver 5.88 fix compile error on Windows
|
||||
* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified.
|
||||
* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available)
|
||||
* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later
|
||||
* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined
|
||||
* 2019/Oct/12 ver 5.83 exit(1) was removed
|
||||
* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam)
|
||||
* 2019/Sep/14 ver 5.81 support some generic mnemonics.
|
||||
* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov)
|
||||
* 2019/May/27 support vp2intersectd, vp2intersectq (not tested)
|
||||
* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
|
||||
* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky)
|
||||
* 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage)
|
||||
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
|
||||
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
|
||||
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
|
||||
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
|
||||
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
|
||||
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
|
||||
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
|
||||
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
* 2018/Sep/04 ver 5.71 L() returns a new label instance
|
||||
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
|
||||
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
|
||||
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
||||
* 2018/Jul/26 ver 5.661 support mingw64
|
||||
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
||||
* 2018/Jun/26 ver 5.65 fix push(qword [mem])
|
||||
* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
|
||||
* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
|
||||
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
|
||||
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
||||
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
||||
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
|
||||
* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage)
|
||||
* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen)
|
||||
* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan)
|
||||
* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan)
|
||||
* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso)
|
||||
* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio
|
||||
* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar)
|
||||
* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed)
|
||||
* 2017/May/13 ver 5.42 add movs{b,w,d,q}
|
||||
* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso)
|
||||
* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label
|
||||
* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso)
|
||||
* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N
|
||||
* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro)
|
||||
* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW
|
||||
* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso)
|
||||
* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso)
|
||||
* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38)
|
||||
* 2016/Nov/20 ver 5.10 add addressing [rip+&var]
|
||||
* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio)
|
||||
* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h
|
||||
* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4
|
||||
* 2016/Aug/03 ver 5.01 disable omitted operand
|
||||
* 2016/Jun/24 ver 5.00 support avx-512 instruction set
|
||||
* 2016/Jun/13 avx-512 add mask instructions
|
||||
* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu
|
||||
* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp)
|
||||
* 2016/Feb/04 ver 4.90 add jcc(const void *addr);
|
||||
* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell)
|
||||
* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere)
|
||||
* 2015/Oct/05 ver 4.87 support segment selectors
|
||||
* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere)
|
||||
* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen)
|
||||
* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff)
|
||||
* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik)
|
||||
* 2015/May/24 ver 4.82 support detection of F16C
|
||||
* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere)
|
||||
* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere)
|
||||
* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac
|
||||
* 2014/Oct/14 ver 4.70 support MmapAllocator
|
||||
* 2014/Jun/13 ver 4.62 disable warning of VC2014
|
||||
* 2014/May/30 ver 4.61 support bt, bts, btr, btc
|
||||
* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph
|
||||
* 2014/Apr/11 ver 4.52 add detection of rdrand
|
||||
* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels
|
||||
* 2014/Mar/16 ver 4.50 support new Label
|
||||
* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox
|
||||
* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64()
|
||||
* 2013/Oct/16 ver 4.21 label support std::string
|
||||
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
|
||||
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
|
||||
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
|
||||
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
||||
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
|
||||
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
|
||||
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
|
||||
* 2013/Jan/15 ver 3.75 add setSize() to modify generated code
|
||||
* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect()
|
||||
* 2013/Jan/06 ver 3.73 use unordered_map if possible
|
||||
* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const.
|
||||
* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined.
|
||||
* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util.
|
||||
* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias)
|
||||
* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit
|
||||
* 2012/Nov/01 ver 3.61 add fldcw/fstcw
|
||||
* 2012/May/03 ver 3.60 change interface of Allocator
|
||||
* 2012/Mar/23 ver 3.51 fix userPtr mode
|
||||
* 2012/Mar/19 ver 3.50 support AutoGrow mode
|
||||
* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd
|
||||
* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat)
|
||||
* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya)
|
||||
* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc
|
||||
* 2011/May/24 ver 3.01 fix typo of OSXSAVE
|
||||
* 2011/May/23 ver 3.00 add vcmpeqps and so on
|
||||
* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
|
||||
* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
|
||||
* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm
|
||||
* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
|
||||
* 2011/Feb/04 ver 2.99 beta support AVX
|
||||
* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
|
||||
* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||
* 2010/Jun/07 ver 2.29 fix call(<label>)
|
||||
* 2010/Jun/17 ver 2.28 move some member functions to public
|
||||
* 2010/Jun/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
|
||||
* 2010/May/24 ver 2.26 fix sub(rsp, 1000)
|
||||
* 2010/Apr/26 ver 2.25 add jc/jnc(I forgot to implement them...)
|
||||
* 2010/Apr/16 ver 2.24 change the prototype of rewrite() method
|
||||
* 2010/Apr/15 ver 2.23 fix align() and xbyak_util.h for Mac
|
||||
* 2010/Feb/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
|
||||
* 2009/Dec/09 ver 2.21 support cygwin(gcc 4.3.2)
|
||||
* 2009/Nov/28 support a part of FPU
|
||||
* 2009/Jun/25 fix mov(qword[rax], imm); (thanks to Martin)
|
||||
* 2009/Mar/10 fix redundant REX.W prefix on jmp/call reg64
|
||||
* 2009/Feb/24 add movq reg64, mmx/xmm; movq mmx/xmm, reg64
|
||||
* 2009/Feb/13 movd(xmm7, dword[eax]) drops 0x66 prefix (thanks to Gabest)
|
||||
* 2008/Dec/30 fix call in short relative address(thanks to kato san)
|
||||
* 2008/Sep/18 support @@, @f, @b and localization of label(thanks to nobu-q san)
|
||||
* 2008/Sep/18 support (ptr[rip + 32bit offset]) (thanks to Dango-Chu san)
|
||||
* 2008/Jun/03 fix align(). mov(ptr[eax],1) throws ERR_MEM_SIZE_IS_NOT_SPECIFIED.
|
||||
* 2008/Jun/02 support memory interface allocated by user
|
||||
* 2008/May/26 fix protect() to avoid invalid setting(thanks to shinichiro_h san)
|
||||
* 2008/Apr/30 add cmpxchg16b, cdqe
|
||||
* 2008/Apr/29 support x64
|
||||
* 2008/Apr/14 code refactoring
|
||||
* 2008/Mar/12 add bsr/bsf
|
||||
* 2008/Feb/14 fix output of sub eax, 1234 (thanks to Robert)
|
||||
* 2007/Nov/5 support lock, xadd, xchg
|
||||
* 2007/Nov/2 support SSSE3/SSE4 (thanks to Dango-Chu san)
|
||||
* 2007/Feb/4 fix the bug that exception doesn't occur under the condition which the offset of jmp mnemonic without T_NEAR is over 127.
|
||||
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
|
||||
* 2007/Jan/4 first version
|
||||
|
||||
## Author
|
||||
MITSUNARI Shigeo(herumi@nifty.com)
|
||||
|
|
@ -0,0 +1,534 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.891
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
||||
これはx86, x64(AMD64, x86-64)のマシン語命令を生成するC++のクラスライブラリです。
|
||||
プログラム実行時に動的にアセンブルすることが可能です。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎特徴
|
||||
|
||||
・ヘッダファイルオンリー
|
||||
xbyak.hをインクルードするだけですぐ利用することができます。
|
||||
C++の枠組み内で閉じているため、外部アセンブラは不要です。
|
||||
32bit/64bit両対応です。
|
||||
対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/VEX-encoded GPR
|
||||
|
||||
・Windows Xp(32bit, 64bit), Windows 7/Linux(32bit, 64bit)/Intel Mac対応
|
||||
Windows Xp, Windows 7上ではVC2008, VC2010, VC2012
|
||||
Linux (kernel 3.8)上ではgcc 4.7.3, clang 3.3
|
||||
Intel Mac
|
||||
などで動作確認をしています。
|
||||
|
||||
※ and, orなどの代わりにand_, or_を使用してください。
|
||||
and, orなどを使いたい場合は-fno-operator-namesをgcc/clangに指定してください。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎準備
|
||||
xbyak.h
|
||||
xbyak_bin2hex.h
|
||||
xbyak_mnemonic.h
|
||||
これらを同一のパスに入れてインクルードパスに追加してください。
|
||||
|
||||
Linuxではmake installで/usr/local/include/xbyakにコピーされます。
|
||||
-----------------------------------------------------------------------------
|
||||
◎下位互換性の破れ
|
||||
* Xbyak::Errorの型をenumからclassに変更
|
||||
** 従来のenumの値をとるにはintにキャストしてください。
|
||||
* (古い)Reg32eクラスを(新しい)Reg32eとRegExpに分ける。
|
||||
** (新しい)Reg32eはReg32かReg64
|
||||
** (新しい)RegExpは'Reg32e + (Reg32e|Xmm|Ymm) * scale + disp'の型
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎新機能
|
||||
|
||||
MmapAllocator追加
|
||||
これはUnix系OSでのみの仕様です。XBYAK_USE_MMAP_ALLOCATORを使うと利用できます。
|
||||
デフォルトのAllocatorはメモリ確保時にposix_memalignを使います。
|
||||
この領域に対するmprotectはmap countを減らします。
|
||||
map countの最大値は/proc/sys/vm/max_map_countに書かれています。
|
||||
デフォルトでは3万個ほどのXbyak::CodeGeneratorインスタンスを生成するとエラーになります。
|
||||
test/mprotect_test.cppで確認できます。
|
||||
これを避けるためにはmmapを使うMmapAllocatorを使ってください。
|
||||
将来この挙動がデフォルトになるかもしれません。
|
||||
|
||||
|
||||
AutoGrowモード追加
|
||||
これはメモリ伸長を動的に行うモードです。
|
||||
今まではXbyak::CodeGenerator()に渡したメモリサイズを超えると例外が発生して
|
||||
いましたが、このモードでは内部でメモリを再確保して伸長します。
|
||||
ただし、getCode()を呼び出す前にジャンプ命令のアドレス解決をするためにready()
|
||||
関数を呼ぶ必要があります。
|
||||
|
||||
次のように使います。
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
||||
{
|
||||
...
|
||||
}
|
||||
};
|
||||
Code c;
|
||||
c.ready(); // この呼び出しを忘れてはいけない
|
||||
|
||||
注意1. ready()を呼んで確定するまではgetCurr()で得たポインタは無効化されている
|
||||
可能性があります。getSize()でoffsetを保持しておきready()のあとにgetCode()を
|
||||
呼び出してからgetCode() + offsetで新しいポインタを取得してください。
|
||||
|
||||
注意2. AutoGrowモードでは64bitモードの相対アドレッシング[rip]は非サポートです。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎文法
|
||||
|
||||
Xbyak::CodeGeneratorクラスを継承し、そのクラスメソッド内でx86, x64アセンブラを
|
||||
記述します。そのメソッドを呼び出した後、getCode()メソッドを呼び出し、その戻
|
||||
り値を自分が使いたい関数ポインタに変換して利用します。アセンブルエラーは例外
|
||||
により通知されます(cf. main.cpp)。
|
||||
|
||||
・基本的にnasmの命令で括弧をつければよいです。
|
||||
|
||||
mov eax, ebx --> mov(eax, ebx);
|
||||
inc ecx inc(ecx);
|
||||
ret --> ret();
|
||||
|
||||
・アドレッシング
|
||||
|
||||
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||
[rip + 32bit disp] ; x64 only
|
||||
という形で指定します。サイズを指定する必要がない限りptrを使えばよいです。
|
||||
|
||||
セレクター(セグメントレジスタ)をサポートしました。
|
||||
(注意)セグメントレジスタはOperandを継承していません。
|
||||
|
||||
mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]);
|
||||
mov ax, cs --> mov(ax, cs);
|
||||
|
||||
mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]);
|
||||
test byte [esp], 4 --> test (byte [esp], 4);
|
||||
|
||||
(注意) dword, word, byteはメンバ変数です。従ってたとえばunsigned intの
|
||||
つもりでdwordをtypedefしないでください。
|
||||
|
||||
・AVX
|
||||
|
||||
FMAについては簡略表記を導入するか検討中です(アイデア募集中)。
|
||||
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3, ptr [rax]); // メモリアクセスはptrで
|
||||
|
||||
vfmadd231pd(xmm1, xmm2, xmm3); // xmm1 <- (xmm2 * xmm3) + xmm1
|
||||
|
||||
*注意*
|
||||
デスティネーションの省略形はサポートされなくなりました。
|
||||
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
|
||||
XBYAK_ENABLE_OMITTED_OPERANDを定義すると使えますが、将来はそれも非サポートになるでしょう。
|
||||
|
||||
・AVX-512
|
||||
|
||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
|
||||
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
|
||||
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
|
||||
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
|
||||
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
|
||||
|
||||
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
|
||||
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
|
||||
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
|
||||
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
|
||||
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
|
||||
|
||||
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
|
||||
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
|
||||
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
|
||||
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
|
||||
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
|
||||
|
||||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
|
||||
|
||||
注意
|
||||
* k1, ..., k7 は新しいopmaskレジスタです。
|
||||
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
|
||||
* `k4 | k3`と`k3 | k4`は意味が異なります。
|
||||
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
|
||||
* 一部の命令はメモリサイズを指定するためにxword/yword/zword(_b)を使ってください。
|
||||
|
||||
・ラベル
|
||||
|
||||
L(文字列);
|
||||
で定義します。ジャンプするときはその文字列を指定します。後方参照も可能ですが、
|
||||
相対アドレスが8ビットに収まらない場合はT_NEARをつけないと実行時に例外が発生
|
||||
します。
|
||||
mov(eax, "L2");の様にラベルが表すアドレスをmovの即値として使えます。
|
||||
|
||||
・hasUndefinedLabel()を呼び出して真ならジャンプ先が存在しないことを示します。
|
||||
コードを見直してください。
|
||||
|
||||
L("L1");
|
||||
jmp ("L1");
|
||||
|
||||
jmp ("L2");
|
||||
...
|
||||
少しの命令の場合。
|
||||
...
|
||||
L("L2");
|
||||
|
||||
jmp ("L3", T_NEAR);
|
||||
...
|
||||
沢山の命令がある場合
|
||||
...
|
||||
L("L3");
|
||||
|
||||
<応用編>
|
||||
|
||||
1. MASMライクな@@, @f, @bをサポート
|
||||
|
||||
L("@@"); // <A>
|
||||
jmp("@b"); // jmp to <A>
|
||||
jmp("@f"); // jmp to <B>
|
||||
L("@@"); // <B>
|
||||
jmp("@b"); // jmp to <B>
|
||||
mov(eax, "@b");
|
||||
jmp(eax); // jmp to <B>
|
||||
|
||||
2. ラベルの局所化
|
||||
|
||||
ピリオドで始まるラベルをinLocalLabel(), outLocalLabel()で挟むことで局所化できます。
|
||||
inLocalLabel(), outLocalLabel()は入れ子にすることができます。
|
||||
|
||||
void func1()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <A> ; ローカルラベル
|
||||
...
|
||||
jmp(".lp"); // jmpt to <A>
|
||||
L("aaa"); // グローバルラベル
|
||||
outLocalLabel();
|
||||
}
|
||||
|
||||
void func2()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <B> ; ローカルラベル
|
||||
func1();
|
||||
jmp(".lp"); // jmp to <B>
|
||||
outLocalLabel();
|
||||
}
|
||||
|
||||
上記サンプルではinLocalLabel(), outLocalLabel()が無いと、
|
||||
".lp"ラベルの二重定義エラーになります。
|
||||
|
||||
3. 新しいLabelクラスによるジャンプ命令
|
||||
|
||||
ジャンプ先を文字列による指定だけでなくラベルクラスを使えるようになりました。
|
||||
|
||||
Label label1, label2;
|
||||
L(label1);
|
||||
...
|
||||
jmp(label1);
|
||||
...
|
||||
jmp(label2);
|
||||
...
|
||||
L(label2);
|
||||
|
||||
更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。
|
||||
|
||||
Label label2;
|
||||
Label label1 = L(); // Label label1; L(label1);と同じ意味
|
||||
...
|
||||
jmp(label2);
|
||||
...
|
||||
assignL(label2, label1);
|
||||
|
||||
上記jmp命令はlabel1にジャンプします。
|
||||
|
||||
制限
|
||||
* srcLabelはL()により飛び先が確定していないといけません。
|
||||
* dstLabelはL()により飛び先が確定していてはいけません。
|
||||
|
||||
ラベルは`getAddress()`によりそのアドレスを取得できます。
|
||||
未定義のときは0が返ります。
|
||||
```
|
||||
// not AutoGrow mode
|
||||
Label label;
|
||||
assert(label.getAddress(), 0);
|
||||
L(label);
|
||||
assert(label.getAddress(), getCurr());
|
||||
```
|
||||
|
||||
・Xbyak::CodeGenerator()コンストラクタインタフェース
|
||||
|
||||
@param maxSize [in] コード生成最大サイズ(デフォルト4096byte)
|
||||
@param userPtr [in] ユーザ指定メモリ
|
||||
|
||||
CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0);
|
||||
|
||||
デフォルトコードサイズは4096(=DEFAULT_MAX_CODE_SIZE)バイトです。
|
||||
それより大きなコードを生成する場合はCodeGenerator()のコンストラクタに指定してください。
|
||||
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Quantize()
|
||||
: CodeGenerator(8192)
|
||||
{
|
||||
}
|
||||
...
|
||||
};
|
||||
|
||||
またユーザ指定メモリをコード生成最大サイズと共に指定すると、CodeGeneratorは
|
||||
指定されたメモリ上にバイト列を生成します。
|
||||
|
||||
補助関数として指定されたアドレスの実行属性を変更するCodeArray::protect()と
|
||||
与えられたポインタからアライメントされたポインタを取得するCodeArray::getAlignedAddress()
|
||||
も用意しました。詳細はsample/test0.cppのuse memory allocated by userを参考に
|
||||
してください。
|
||||
|
||||
/**
|
||||
change exec permission of memory
|
||||
@param addr [in] buffer address
|
||||
@param size [in] buffer size
|
||||
@param canExec [in] true(enable to exec), false(disable to exec)
|
||||
@return true(success), false(failure)
|
||||
*/
|
||||
bool CodeArray::protect(const void *addr, size_t size, bool canExec);
|
||||
|
||||
/**
|
||||
get aligned memory pointer
|
||||
*/
|
||||
uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE);
|
||||
|
||||
・read/execモード
|
||||
デフォルトのCodeGeneratorはコンストラクト時にJIT用の領域をread/write/execモードに設定して利用します。
|
||||
コード生成時はread/writeでコード実行時にはread/execにしたい場合、次のようにしてください。
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontUseProtect) // JIT領域をread/writeのままコード生成
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.setProtectModeRE(); // read/execモードに変更
|
||||
// JIT領域を実行
|
||||
|
||||
AutoGrowの場合はreadyの代わりにreadyRE()を読んでください。
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow) // JIT領域をread/writeのままコード生成
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.readyRE(); // read/exeモードに変更
|
||||
// JIT領域を実行
|
||||
|
||||
setProtectModeRW()を呼ぶと領域が元のread/execモードに戻ります。
|
||||
|
||||
|
||||
その他詳細は各種サンプルを参照してください。
|
||||
-----------------------------------------------------------------------------
|
||||
◎マクロ
|
||||
|
||||
32bit環境上でコンパイルするとXBYAK32が、64bit環境上でコンパイルするとXBYAK64が
|
||||
定義されます。さらに64bit環境上ではWindows(VC)ならXBYAK64_WIN、cygwin, gcc上では
|
||||
XBYAK64_GCCが定義されます。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎使用例
|
||||
|
||||
test0.cpp ; 簡単な例(x86, x64)
|
||||
quantize.cpp ; 割り算のJITアセンブルによる量子化の高速化(x86)
|
||||
calc.cpp ; 与えられた多項式をアセンブルして実行(x86, x64)
|
||||
boost(http://www.boost.org/)が必要
|
||||
bf.cpp ; JIT Brainfuck(x86, x64)
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎ライセンス
|
||||
|
||||
修正された新しいBSDライセンスに従います。
|
||||
http://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||
いただきました。
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2020/02/26 ver 5.891 zm0のtype修正
|
||||
2020/01/03 ver 5.89 vfpclasspdの処理エラー修正
|
||||
2019/12/20 ver 5.88 Windowsでのコンパイルエラー修正
|
||||
2019/12/19 ver 5.87 未定義ラベルへのjmp命令のデフォルト挙動をT_NEARにするsetDefaultJmpNEAR()を追加
|
||||
2019/12/13 ver 5.86 [変更] -fno-operator-namesが指定されたときは5.84以前の挙動に戻す
|
||||
2019/12/07 ver 5.85 mmapにMAP_JITフラグを追加(macOS mojave以上)
|
||||
2019/11/29 ver 5.84 [変更] XBYAK_USE_OP_NAMESが定義されていない限りXBYAK_NO_OP_NAMESが定義されるように変更
|
||||
2019/10/12 ver 5.83 exit(1)の除去
|
||||
2019/09/23 ver 5.82 monitorx, mwaitx, clzero対応 (thanks to MagurosanTeam)
|
||||
2019/09/14 ver 5.81 いくつかの一般命令をサポート
|
||||
2019/08/01 ver 5.802 AVX512_BF16判定修正 (thanks to vpirogov)
|
||||
2019/05/27 support vp2intersectd, vp2intersectq (not tested)
|
||||
2019/05/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
|
||||
2019/04/27 ver 5.79 vcmppd/vcmppsのptr_b対応忘れ(thanks to jkopinsky)
|
||||
2019/04/15 ver 5.78 Reg::changeBit()のリファクタリング(thanks to MerryMage)
|
||||
2019/03/06 ver 5.77 LLCキャッシュを共有数CPU数の修整(by densamoilov)
|
||||
2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel)
|
||||
2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元
|
||||
2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す
|
||||
2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除
|
||||
2018/10/15 util::StackFrameでmovの代わりにpush/popを使う
|
||||
2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整
|
||||
2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加
|
||||
2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加
|
||||
2018/08/24 ver 5.68 indexが16以上のVSIBエンコーディングのバグ修正(thanks to petercaday)
|
||||
2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm
|
||||
2018/07/26 ver 5.661 mingw64対応
|
||||
2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加
|
||||
2018/06/26 ver 5.65 fix push(qword [mem])
|
||||
2018/03/07 ver 5.64 Cpu()の中でzero divisionが出ることがあるのを修正
|
||||
2018/02/14 ver 5.63 Cpu::setCacheHierarchy()の修正とclang<3.9のためのEvexModifierZero修正(thanks to mgouicem)
|
||||
2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||
2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)
|
||||
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
|
||||
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
|
||||
2017/08/22 ver 5.53 mpxエンコーディングバグ修正, bnd()プレフィクス追加
|
||||
2017/08/18 ver 5.52 align修正(thanks to MerryMage)
|
||||
2017/08/17 ver 5.51 multi-byte nop追加 align()はそれを使用する(thanks to inolen)
|
||||
2017/08/08 ver 5.50 mpx追加(thanks to magurosan)
|
||||
2017/08/08 ver 5.45 sha追加(thanks to magurosan)
|
||||
2017/08/08 ver 5.44 prefetchw追加(thanks to rsdubtso)
|
||||
2017/07/12 ver 5.432 PVS-studioの警告を減らす
|
||||
2017/07/09 ver 5.431 hasRex()修正 (影響なし) (thanks to drillsar)
|
||||
2017/05/14 ver 5.43 CodeGenerator::resetSize()修正(thanks to gibbed)
|
||||
2017/05/13 ver 5.42 movs{b,w,d,q}追加
|
||||
2017/01/26 ver 5.41 prefetcwt1追加とscale == 0対応(thanks to rsdubtso)
|
||||
2016/12/14 ver 5.40 Labelが示すアドレスを取得するLabel::getAddress()追加
|
||||
2016/12/07 ver 5.34 disp8N時の負のオフセット処理の修正(thanks to rsdubtso)
|
||||
2016/12/06 ver 5.33 disp8N時のvpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w}のバグ修正
|
||||
2016/12/01 ver 5.32 clang for Visual Studioサポートのために__xgetbv()を_xgetbv()に変更(thanks to freiro)
|
||||
2016/11/27 ver 5.31 AVX512_4VNNIをAVX512_4VNNIWに変更
|
||||
2016/11/27 ver 5.30 AVX512_4VNNI, AVX512_4FMAPS命令の追加(thanks to rsdubtso)
|
||||
2016/11/26 ver 5.20 AVX512_4VNNIとAVX512_4FMAPSの判定追加(thanks to rsdubtso)
|
||||
2016/11/20 ver 5.11 何故か消えていたvptest for ymm追加(thanks to gregory38)
|
||||
2016/11/20 ver 5.10 [rip+&var]の形のアドレッシング追加
|
||||
2016/09/29 ver 5.03 ERR_INVALID_OPMASK_WITH_MEMORYの判定ミス修正(thanks to PVS-Studio)
|
||||
2016/08/15 ver 5.02 xbyak_bin2hex.hをincludeしない
|
||||
2016/08/15 ver 5.011 gcc 5.4のバージョン取得ミスの修正
|
||||
2016/08/03 ver 5.01 AVXの省略表記非サポート
|
||||
2016/07/24 ver 5.00 avx-512フルサポート
|
||||
2016/06/13 avx-512 opmask命令サポート
|
||||
2016/05/05 ver 4.91 AVX-512命令の検出サポート
|
||||
2016/03/14 ver 4.901 ready()関数にコメント加筆(thanks to skmp)
|
||||
2016/02/04 ver 4.90 条件分岐命令にjcc(const void *addr);のタイプを追加
|
||||
2016/01/30 ver 4.89 vpblendvbがymmレジスタをサポートしていなかった(thanks to John Funnell)
|
||||
2016/01/24 ver 4.88 lea, cmovの16bitレジスタ対応(thanks to whyisthisfieldhere)
|
||||
2015/08/16 ver 4.87 セグメントセレクタに対応
|
||||
2015/08/16 ver 4.86 [rip + label]アドレッシングで即値を使うと壊れる(thanks to whyisthisfieldhere)
|
||||
2015/08/10 ver 4.85 Address::operator==()が間違っている(thanks to inolen)
|
||||
2015/07/22 ver 4.84 call()がvariadic template対応
|
||||
2015/05/24 ver 4.83 mobveサポート(thanks to benvanik)
|
||||
2015/05/24 ver 4.82 F16Cが使えるかどうかの判定追加
|
||||
2015/04/25 ver 4.81 setSizeが例外を投げる条件を修正(thanks to whyisthisfieldhere)
|
||||
2015/04/22 ver 4.80 rip相対でLabelのサポート(thanks to whyisthisfieldhere)
|
||||
2015/01/28 ver 4.71 adcx, adox, cmpxchg, rdseed, stacのサポート
|
||||
2014/10/14 ver 4.70 MmapAllocatorのサポート
|
||||
2014/06/13 ver 4.62 VC2014で警告抑制
|
||||
2014/05/30 ver 4.61 bt, bts, btr, btcのサポート
|
||||
2014/05/28 ver 4.60 vcvtph2ps, vcvtps2phのサポート
|
||||
2014/04/11 ver 4.52 rdrandの判定追加
|
||||
2014/03/25 ver 4.51 参照されなくなったラベルの状態を削除する
|
||||
2014/03/16 ver 4.50 新しいラベルクラスのサポート
|
||||
2014/03/05 ver 4.40 VirtualBox上でBMI/enhanced repのサポート判定を間違うことがあるのを修正
|
||||
2013/12/03 ver 4.30 Reg::cvt8(), cvt16(), cvt32()のサポート
|
||||
2013/10/16 ver 4.21 ラベルでstd::stringを受け付ける。
|
||||
2013/07/30 ver 4.20 [break backward compatibility] 従来のReg32eクラスをアドレッシング用のRegExpとReg32, Reg64を表すReg32eに分離
|
||||
2013/07/04 ver 4.10 [break backward compatibility] Xbyak::Errorの型をenumからclassに変更
|
||||
2013/06/21 ver 4.02 LABELの指すアドレスを書き込むputL(LABEL)関数の追加。
|
||||
2013/06/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm)
|
||||
support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest)
|
||||
2013/05/30 ver 4.00 AVX2, VEX-encoded GPR-instructionをサポート
|
||||
2013/03/27 ver 3.80 mov(reg, "label");をサポート
|
||||
2013/03/13 ver 3.76 cqo, jcxz, jecxz, jrcxz追加
|
||||
2013/01/15 ver 3.75 生成されたコードを修正するためにsetSize()を追加
|
||||
2013/01/12 ver 3.74 CodeGenerator::reset()とAllocator::useProtect()を追加
|
||||
2013/01/06 ver 3.73 可能ならunordered_mapを使う
|
||||
2012/12/04 ver 3.72 eaxなどをCodeGeneratorのメンバ変数に戻す. Xbyak::util::eaxはstatic const変数
|
||||
2012/11/17 ver 3.71 and_(), or_(), xor_(), not_()をXBYAK_NO_OP_NAMESが定義されていないときでも使えるようにした
|
||||
2012/11/17 CodeGeneratorのeax, ecx, ptrなどのメンバ変数をstaticにし、const参照をXbyak::utilにも定義
|
||||
2012/11/09 ver 3.70 and()をand_()にするためのマクロXBYAK_NO_OP_NAMESを追加(thanks to Mattias)
|
||||
2012/11/01 ver 3.62 add fwait/fnwait/finit/fninit
|
||||
2012/11/01 ver 3.61 add fldcw/fstcw
|
||||
2012/05/03 ver 3.60 Allocatorクラスのインタフェースを変更
|
||||
2012/03/23 ver 3.51 userPtrモードがバグったのを修正
|
||||
2012/03/19 ver 3.50 AutoGrowモードサポート
|
||||
2011/11/09 ver 3.05 rip相対の64bitサイズ以外の扱いのバグ修正 / movsxdサポート
|
||||
2011/08/15 ver 3.04 add(dword [ebp-8], 0xda);などにおけるimm8の扱いのバグ修正(thanks to lolcat)
|
||||
2011/06/16 ver 3.03 Macのgcc上での__GNUC_PREREQがミスってたのを修正(thanks to t_teruya)
|
||||
2011/04/28 ver 3.02 Macのgcc上ではxgetbvをdisable
|
||||
2011/03/24 ver 3.01 fix typo of OSXSAVE
|
||||
2011/03/23 ver 3.00 vcmpeqpsなどを追加
|
||||
2011/02/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
|
||||
2011/02/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
|
||||
2011/02/10 ver 2.992 beta support one argument syntax for fadd like nasm
|
||||
2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
|
||||
2011/02/04 ver 2.99 beta support AVX
|
||||
2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp
|
||||
2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||
2010/07/07 ver 2.29 fix call(<label>)
|
||||
2010/06/17 ver 2.28 move some member functions to public
|
||||
2010/06/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
|
||||
2010/05/24 ver 2.26 fix sub(rsp, 1000)
|
||||
2010/04/26 ver 2.25 add jc/jnc(I forgot to implement them...)
|
||||
2010/04/16 ver 2.24 change the prototype of rewrite() method
|
||||
2010/04/15 ver 2.23 fix align() and xbyak_util.h for Mac
|
||||
2010/02/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
|
||||
2009/12/09 ver 2.21 support cygwin(gcc 4.3.2)
|
||||
2009/11/28 ver 2.20 FPUの一部命令サポート
|
||||
2009/06/25 ver 2.11 64bitモードでの mov(qword[rax], imm); 修正(thanks to Martinさん)
|
||||
2009/03/10 ver 2.10 jmp/call reg64の冗長なREG.W削除
|
||||
2009/02/24 ver 2.09 movq reg64, mmx/xmm; movq mmx/xmm, reg64追加
|
||||
2009/02/13 ver 2.08 movd(xmm7, dword[eax])が0x66を落とすバグ修正(thanks to Gabestさん)
|
||||
2008/12/30 ver 2.07 call()の相対アドレスが8bit以下のときのバグ修正(thanks to katoさん)
|
||||
2008/09/18 ver 2.06 @@, @f, @bとラベルの局所化機能追加(thanks to nobu-qさん)
|
||||
2008/09/18 ver 2.05 ptr [rip + 32bit offset]サポート(thanks to 団子厨(Dango-Chu)さん)
|
||||
2008/06/03 ver 2.04 align()のポカミス修正。mov(ptr[eax],1);などをエラーに
|
||||
2008/06/02 ver 2.03 ユーザ定義メモリインタフェースサポート
|
||||
2008/05/26 ver 2.02 protect()(on Linux)で不正な設定になることがあるのを修正(thanks to sinichiro_hさん)
|
||||
2008/04/30 ver 2.01 cmpxchg16b, cdqe追加
|
||||
2008/04/29 ver 2.00 x86/x64-64版公開
|
||||
2008/04/25 ver 1.90 x64版β公開
|
||||
2008/04/18 ver 1.12 コード整理
|
||||
2008/04/14 ver 1.11 コード整理
|
||||
2008/03/12 ver 1.10 bsf/bsr追加(忘れていた)
|
||||
2008/02/14 ver 1.09 sub eax, 1234が16bitモードで出力されていたのを修正(thanks to Robertさん)
|
||||
2007/11/05 ver 1.08 lock, xadd, xchg追加
|
||||
2007/11/02 ver 1.07 SSSE3/SSE4対応(thanks to 団子厨(Dango-Chu)さん)
|
||||
2007/09/25 ver 1.06 call((int)関数ポインタ); jmp((int)関数ポインタ);のサポート
|
||||
2007/08/04 ver 1.05 細かい修正
|
||||
2007/02/04 後方へのジャンプでT_NEARをつけないときに8bit相対アドレスに入らない
|
||||
場合に例外が発生しないバグの修正
|
||||
2007/01/21 [disp]の形のアドレス生成のバグ修正
|
||||
mov (eax|ax|al, [disp]); mov([disp], eax|ax|al);の短い表現選択
|
||||
2007/01/17 webページ作成
|
||||
2007/01/04 公開開始
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎著作権者
|
||||
|
||||
光成滋生(MITSUNARI Shigeo, herumi@nifty.com)
|
|
@ -0,0 +1,113 @@
|
|||
TARGET = test quantize bf toyvm test_util memfunc static_buf jmp_table
|
||||
XBYAK_INC=../xbyak/xbyak.h
|
||||
|
||||
BOOST_EXIST=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1")
|
||||
UNAME_M=$(shell uname -m)
|
||||
|
||||
ifeq ($(shell uname -s),Darwin)
|
||||
ifeq ($(UNAME_M),x86_64)
|
||||
BIT=64
|
||||
endif
|
||||
ifeq ($(UNAME_M),i386)
|
||||
BIT=32
|
||||
endif
|
||||
ifeq ($(shell sw_vers -productVersion | cut -c1-4 | sed 's/\.//'),105)
|
||||
ifeq ($(shell sysctl -n hw.cpu64bit_capable),1)
|
||||
BIT=64
|
||||
endif
|
||||
endif
|
||||
else
|
||||
BIT=32
|
||||
ifeq ($(UNAME_M),x86_64)
|
||||
BIT=64
|
||||
endif
|
||||
ifeq ($(UNAME_M),amd64)
|
||||
BIT=64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BIT),64)
|
||||
TARGET += test64 bf64 memfunc64 test_util64 static_buf64 jmp_table64
|
||||
ifeq ($(BOOST_EXIST),1)
|
||||
TARGET += calc64 #calc2_64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BOOST_EXIST),1)
|
||||
TARGET += calc #calc2
|
||||
endif
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith -pedantic
|
||||
|
||||
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN)
|
||||
|
||||
test:
|
||||
$(CXX) $(CFLAGS) test0.cpp -o $@ -m32
|
||||
|
||||
quantize:
|
||||
$(CXX) $(CFLAGS) quantize.cpp -o $@ -m32
|
||||
|
||||
calc:
|
||||
$(CXX) $(CFLAGS) calc.cpp -o $@ -m32
|
||||
calc64:
|
||||
$(CXX) $(CFLAGS) calc.cpp -o $@ -m64
|
||||
calc2:
|
||||
$(CXX) $(CFLAGS) calc2.cpp -o $@ -m32
|
||||
calc2_64:
|
||||
$(CXX) $(CFLAGS) calc2.cpp -o $@ -m64
|
||||
|
||||
bf:
|
||||
$(CXX) $(CFLAGS) bf.cpp -o $@ -m32
|
||||
bf64:
|
||||
$(CXX) $(CFLAGS) bf.cpp -o $@ -m64
|
||||
|
||||
memfunc:
|
||||
$(CXX) $(CFLAGS) memfunc.cpp -o $@ -m32
|
||||
memfunc64:
|
||||
$(CXX) $(CFLAGS) memfunc.cpp -o $@ -m64
|
||||
|
||||
toyvm:
|
||||
$(CXX) $(CFLAGS) toyvm.cpp -o $@ -m32
|
||||
|
||||
test64:
|
||||
$(CXX) $(CFLAGS) test0.cpp -o $@ -m64
|
||||
test_util:
|
||||
$(CXX) $(CFLAGS) test_util.cpp -o $@ -m32
|
||||
test_util64:
|
||||
$(CXX) $(CFLAGS) test_util.cpp -o $@ -m64
|
||||
static_buf:
|
||||
$(CXX) $(CFLAGS) static_buf.cpp -o $@ -m32
|
||||
static_buf64:
|
||||
$(CXX) $(CFLAGS) static_buf.cpp -o $@ -m64
|
||||
jmp_table:
|
||||
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m32
|
||||
jmp_table64:
|
||||
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m64
|
||||
profiler: profiler.cpp ../xbyak/xbyak_util.h
|
||||
$(CXX) $(CFLAGS) profiler.cpp -o $@
|
||||
profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h
|
||||
$(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
|
||||
|
||||
clean:
|
||||
rm -rf *.o $(TARGET) *.exe profiler profiler-vtune
|
||||
|
||||
test : test0.cpp $(XBYAK_INC)
|
||||
test64: test0.cpp $(XBYAK_INC)
|
||||
quantize : quantize.cpp $(XBYAK_INC)
|
||||
calc : calc.cpp $(XBYAK_INC)
|
||||
calc64 : calc.cpp $(XBYAK_INC)
|
||||
calc2 : calc2.cpp $(XBYAK_INC)
|
||||
calc2_64 : calc2.cpp $(XBYAK_INC)
|
||||
bf : bf.cpp $(XBYAK_INC)
|
||||
bf64 : bf.cpp $(XBYAK_INC)
|
||||
memfunc : memfunc.cpp $(XBYAK_INC)
|
||||
memfunc64 : memfunc.cpp $(XBYAK_INC)
|
||||
toyvm : toyvm.cpp $(XBYAK_INC)
|
||||
static_buf: static_buf.cpp $(XBYAK_INC)
|
||||
static_buf64: static_buf.cpp $(XBYAK_INC)
|
||||
test_util : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
||||
test_util2 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
||||
jmp_table: jmp_table.cpp $(XBYAK_INC)
|
||||
jmp_table64: jmp_table.cpp $(XBYAK_INC)
|
|
@ -0,0 +1,211 @@
|
|||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stack>
|
||||
#include <fstream>
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4996) // scanf
|
||||
#define snprintf _snprintf_s
|
||||
#endif
|
||||
|
||||
class Brainfuck : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
int getContinuousChar(std::istream& is, char c)
|
||||
{
|
||||
int count = 1;
|
||||
char p;
|
||||
while (is >> p) {
|
||||
if (p != c) break;
|
||||
count++;
|
||||
}
|
||||
is.unget();
|
||||
return count;
|
||||
}
|
||||
Brainfuck(std::istream& is) : CodeGenerator(100000)
|
||||
{
|
||||
// void (*)(void* putchar, void* getchar, int *stack)
|
||||
using namespace Xbyak;
|
||||
#ifdef XBYAK32
|
||||
const Reg32& pPutchar(esi);
|
||||
const Reg32& pGetchar(edi);
|
||||
const Reg32& stack(ebp);
|
||||
const Address cur = dword [stack];
|
||||
push(ebp); // stack
|
||||
push(esi);
|
||||
push(edi);
|
||||
const int P_ = 4 * 3;
|
||||
mov(pPutchar, ptr[esp + P_ + 4]); // putchar
|
||||
mov(pGetchar, ptr[esp + P_ + 8]); // getchar
|
||||
mov(stack, ptr[esp + P_ + 12]); // stack
|
||||
#elif defined(XBYAK64_WIN)
|
||||
const Reg64& pPutchar(rsi);
|
||||
const Reg64& pGetchar(rdi);
|
||||
const Reg64& stack(rbp); // stack
|
||||
const Address cur = dword [stack];
|
||||
push(rsi);
|
||||
push(rdi);
|
||||
push(rbp);
|
||||
mov(pPutchar, rcx); // putchar
|
||||
mov(pGetchar, rdx); // getchar
|
||||
mov(stack, r8); // stack
|
||||
#else
|
||||
const Reg64& pPutchar(rbx);
|
||||
const Reg64& pGetchar(rbp);
|
||||
const Reg64& stack(r12); // stack
|
||||
const Address cur = dword [stack];
|
||||
push(rbx);
|
||||
push(rbp);
|
||||
push(r12);
|
||||
mov(pPutchar, rdi); // putchar
|
||||
mov(pGetchar, rsi); // getchar
|
||||
mov(stack, rdx); // stack
|
||||
#endif
|
||||
std::stack<Label> labelF, labelB;
|
||||
char c;
|
||||
while (is >> c) {
|
||||
switch (c) {
|
||||
case '+':
|
||||
case '-':
|
||||
{
|
||||
int count = getContinuousChar(is, c);
|
||||
if (count == 1) {
|
||||
c == '+' ? inc(cur) : dec(cur);
|
||||
} else {
|
||||
add(cur, (c == '+' ? count : -count));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
case '<':
|
||||
{
|
||||
int count = getContinuousChar(is, c);
|
||||
add(stack, 4 * (c == '>' ? count : -count));
|
||||
}
|
||||
break;
|
||||
case '.':
|
||||
#ifdef XBYAK32
|
||||
push(cur);
|
||||
call(pPutchar);
|
||||
pop(eax);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
mov(ecx, cur);
|
||||
sub(rsp, 32);
|
||||
call(pPutchar);
|
||||
add(rsp, 32);
|
||||
#else
|
||||
mov(edi, cur);
|
||||
call(pPutchar);
|
||||
#endif
|
||||
break;
|
||||
case ',':
|
||||
#if defined(XBYAK32) || defined(XBYAK64_GCC)
|
||||
call(pGetchar);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
sub(rsp, 32);
|
||||
call(pGetchar);
|
||||
add(rsp, 32);
|
||||
#endif
|
||||
mov(cur, eax);
|
||||
break;
|
||||
case '[':
|
||||
{
|
||||
Label B = L();
|
||||
labelB.push(B);
|
||||
mov(eax, cur);
|
||||
test(eax, eax);
|
||||
Label F;
|
||||
jz(F, T_NEAR);
|
||||
labelF.push(F);
|
||||
}
|
||||
break;
|
||||
case ']':
|
||||
{
|
||||
Label B = labelB.top(); labelB.pop();
|
||||
jmp(B);
|
||||
Label F = labelF.top(); labelF.pop();
|
||||
L(F);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
#ifdef XBYAK32
|
||||
pop(edi);
|
||||
pop(esi);
|
||||
pop(ebp);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
pop(rbp);
|
||||
pop(rdi);
|
||||
pop(rsi);
|
||||
#else
|
||||
pop(r12);
|
||||
pop(rbp);
|
||||
pop(rbx);
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
void dump(const Xbyak::uint8 *code, size_t size)
|
||||
{
|
||||
puts("#include <stdio.h>\nstatic int stack[128 * 1024];");
|
||||
#ifdef _MSC_VER
|
||||
printf("static __declspec(align(4096)) ");
|
||||
#else
|
||||
printf("static __attribute__((aligned(4096)))");
|
||||
#endif
|
||||
puts("const unsigned char code[] = {");
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
printf("0x%02x,", code[i]); if ((i % 16) == 15) putchar('\n');
|
||||
}
|
||||
puts("\n};");
|
||||
#ifdef _MSC_VER
|
||||
puts("#include <windows.h>");
|
||||
#else
|
||||
puts("#include <unistd.h>");
|
||||
puts("#include <sys/mman.h>");
|
||||
#endif
|
||||
puts("int main()\n{");
|
||||
#ifdef _MSC_VER
|
||||
puts("\tDWORD oldProtect;");
|
||||
puts("\tVirtualProtect((void*)code, sizeof(code), PAGE_EXECUTE_READWRITE, &oldProtect);");
|
||||
#else
|
||||
puts("\tlong pageSize = sysconf(_SC_PAGESIZE) - 1;");
|
||||
puts("\tmprotect((void*)code, (sizeof(code) + pageSize) & ~pageSize, PROT_READ | PROT_EXEC);");
|
||||
#endif
|
||||
puts(
|
||||
"\t((void (*)(void*, void*, int *))code)((void*)putchar, (void*)getchar, stack);\n"
|
||||
"}"
|
||||
);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
fprintf(stderr, "32bit mode\n");
|
||||
#else
|
||||
fprintf(stderr, "64bit mode\n");
|
||||
#endif
|
||||
if (argc == 1) {
|
||||
fprintf(stderr, "bf filename.bf [0|1]\n");
|
||||
return 1;
|
||||
}
|
||||
std::ifstream ifs(argv[1]);
|
||||
int mode = argc == 3 ? atoi(argv[2]) : 0;
|
||||
try {
|
||||
Brainfuck bf(ifs);
|
||||
if (mode == 0) {
|
||||
static int stack[128 * 1024];
|
||||
bf.getCode<void (*)(const void*, const void*, int *)>()(reinterpret_cast<const void*>(putchar), reinterpret_cast<const void*>(getchar), stack);
|
||||
} else {
|
||||
dump(bf.getCode(), bf.getSize());
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,228 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{654BD79B-59D3-4B10-BBAA-158BAB272828}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/bf.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/bf.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/bf.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/bf.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/bf.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/bf.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/bf.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/bf.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/bf.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/bf.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="bf.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
@author herumi
|
||||
|
||||
tiny calculator
|
||||
This program generates a function to calc the value of
|
||||
polynomial given by user in run-time.
|
||||
use boost::spirit::classic
|
||||
see calc2.cpp for new version of boost::spirit
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4127) // for boost(constant condition)
|
||||
#pragma warning(disable : 4512) // for boost
|
||||
#endif
|
||||
#include <boost/spirit/include/classic_file_iterator.hpp>
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <boost/bind.hpp>
|
||||
|
||||
enum Error {
|
||||
UNDEFINED_VARIABLE = 1
|
||||
};
|
||||
|
||||
/*
|
||||
JIT assemble of given polynomial for VC or gcc
|
||||
*/
|
||||
class FuncGen : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
typedef std::map<std::string, int> Map;
|
||||
private:
|
||||
enum {
|
||||
MAX_CONST_NUM = 32
|
||||
};
|
||||
double constTbl_[MAX_CONST_NUM];
|
||||
size_t constTblPos_;
|
||||
int regIdx_;
|
||||
Map varMap_; // map var name to index
|
||||
#ifdef XBYAK32
|
||||
const Xbyak::Reg32& valTbl_;
|
||||
const Xbyak::Reg32& tbl_;
|
||||
#else
|
||||
const Xbyak::Reg64& valTbl_;
|
||||
const Xbyak::Reg64& tbl_;
|
||||
#endif
|
||||
public:
|
||||
/*
|
||||
@param y [out] the value of f(var)
|
||||
@param var [in] table of input variables
|
||||
func(double *y, const double var[]);
|
||||
@note func does not return double to avoid difference of compiler
|
||||
*/
|
||||
FuncGen(const std::vector<std::string>& varTbl)
|
||||
: constTblPos_(0)
|
||||
, regIdx_(-1)
|
||||
#ifdef XBYAK32
|
||||
, valTbl_(eax)
|
||||
, tbl_(edx)
|
||||
#elif defined(XBYAK64_WIN)
|
||||
, valTbl_(rcx)
|
||||
, tbl_(rdx)
|
||||
#else
|
||||
, valTbl_(rdi)
|
||||
, tbl_(rsi)
|
||||
#endif
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
mov(valTbl_, ptr[esp+8]); // eax == varTbl
|
||||
mov(tbl_, (size_t)constTbl_);
|
||||
#else
|
||||
#ifdef XBYAK64_WIN
|
||||
movaps(ptr [rsp + 8], xm6); // save xm6, xm7
|
||||
movaps(ptr [rsp + 8 + 16], xm7);
|
||||
#endif
|
||||
mov(tbl_, (size_t)constTbl_);
|
||||
#endif
|
||||
for (int i = 0, n = static_cast<int>(varTbl.size()); i < n; i++) {
|
||||
varMap_[varTbl[i]] = i;
|
||||
}
|
||||
}
|
||||
// use edx
|
||||
void genPush(double n)
|
||||
{
|
||||
if (constTblPos_ >= MAX_CONST_NUM) throw;
|
||||
constTbl_[constTblPos_] = n;
|
||||
if (regIdx_ == 7) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), ptr[tbl_ + (int)(constTblPos_ * sizeof(double))]);
|
||||
constTblPos_++;
|
||||
}
|
||||
// use eax
|
||||
void genVal(const char *begin, const char *end)
|
||||
{
|
||||
std::string var(begin, end);
|
||||
if (varMap_.find(var) == varMap_.end()) throw UNDEFINED_VARIABLE;
|
||||
if (regIdx_ == 7) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), ptr[valTbl_ + varMap_[var] * sizeof(double)]);
|
||||
}
|
||||
void genAdd(const char*, const char*)
|
||||
{
|
||||
addsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genSub(const char*, const char*)
|
||||
{
|
||||
subsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genMul(const char*, const char*)
|
||||
{
|
||||
mulsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genDiv(const char*, const char*)
|
||||
{
|
||||
divsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void complete()
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
mov(eax, ptr [esp + 4]); // eax = valTbl
|
||||
movsd(ptr [eax], xm0);
|
||||
#else
|
||||
#ifdef XBYAK64_WIN
|
||||
movaps(xm6, ptr [rsp + 8]);
|
||||
movaps(xm7, ptr [rsp + 8 + 16]);
|
||||
#endif
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
struct Grammar : public boost::spirit::classic::grammar<Grammar> {
|
||||
FuncGen& f_;
|
||||
Grammar(FuncGen& f) : f_(f) { }
|
||||
template<typename ScannerT>
|
||||
struct definition {
|
||||
boost::spirit::classic::rule<ScannerT> poly0, poly1, poly2, var;
|
||||
|
||||
definition(const Grammar& self)
|
||||
{
|
||||
using namespace boost;
|
||||
using namespace boost::spirit::classic;
|
||||
|
||||
poly0 = poly1 >> *(('+' >> poly1)[bind(&FuncGen::genAdd, ref(self.f_), _1, _2)]
|
||||
| ('-' >> poly1)[bind(&FuncGen::genSub, ref(self.f_), _1, _2)]);
|
||||
poly1 = poly2 >> *(('*' >> poly2)[bind(&FuncGen::genMul, ref(self.f_), _1, _2)]
|
||||
| ('/' >> poly2)[bind(&FuncGen::genDiv, ref(self.f_), _1, _2)]);
|
||||
var = (+alpha_p)[bind(&FuncGen::genVal, ref(self.f_), _1, _2)];
|
||||
poly2 = real_p[bind(&FuncGen::genPush, ref(self.f_), _1)]
|
||||
| var
|
||||
| '(' >> poly0 >> ')';
|
||||
}
|
||||
const boost::spirit::classic::rule<ScannerT>& start() const { return poly0; }
|
||||
};
|
||||
};
|
||||
|
||||
void put(const std::vector<double>& x)
|
||||
{
|
||||
for (size_t i = 0, n = x.size(); i < n; i++) {
|
||||
if (i > 0) printf(", ");
|
||||
printf("%f", x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc <= 2) {
|
||||
fprintf(stderr, "calc \"var1 var2 ...\" \"function of var\"\n");
|
||||
fprintf(stderr, "eg. calc x \"x*x\"\n");
|
||||
fprintf(stderr, "eg. calc \"x y z\" \"x*x + y - z\"\n");
|
||||
return 1;
|
||||
}
|
||||
const char *poly = argv[2];
|
||||
try {
|
||||
std::vector<std::string> varTbl;
|
||||
|
||||
// get varTbl from argv[1]
|
||||
{
|
||||
std::istringstream is(argv[1]);
|
||||
int i = 0;
|
||||
printf("varTbl = { ");
|
||||
while (is) {
|
||||
std::string var;
|
||||
is >> var;
|
||||
if (var.empty()) break;
|
||||
printf("%s:%d, ", var.c_str(), i);
|
||||
varTbl.push_back(var);
|
||||
i++;
|
||||
}
|
||||
printf("}\n");
|
||||
}
|
||||
FuncGen funcGen(varTbl);
|
||||
Grammar calc(funcGen);
|
||||
boost::spirit::classic::parse_info<> r = parse(poly, calc, boost::spirit::classic::space_p);
|
||||
if (!r.full) {
|
||||
printf("err poly=%s\n", poly);
|
||||
return 1;
|
||||
}
|
||||
funcGen.complete();
|
||||
std::vector<double> valTbl;
|
||||
valTbl.resize(varTbl.size());
|
||||
#ifdef XBYAK32
|
||||
puts("32bit mode");
|
||||
void (*func)(double *ret, const double *valTbl) = funcGen.getCode<void (*)(double *, const double*)>();
|
||||
#else
|
||||
puts("64bit mode");
|
||||
double (*func)(const double *valTbl) = funcGen.getCode<double (*)(const double*)>();
|
||||
#endif
|
||||
for (int i = 0; i < 10; i++) {
|
||||
for (size_t j = 0, n = valTbl.size(); j < n; j++) {
|
||||
valTbl[j] = rand() % 7;
|
||||
}
|
||||
double y;
|
||||
#ifdef XBYAK32
|
||||
func(&y, &valTbl[0]);
|
||||
#else
|
||||
y = func(&valTbl[0]);
|
||||
#endif
|
||||
printf("f("); put(valTbl); printf(")=%f\n", y);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (Error err) {
|
||||
printf("ERR:%d\n", err);
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,228 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{5FDDFAA6-B947-491D-A17E-BBD863846579}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/calc.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/calc.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/calc.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/calc.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/calc.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/calc.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/calc.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/calc.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/calc.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/calc.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="calc.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,302 @@
|
|||
/*
|
||||
@author herumi
|
||||
|
||||
tiny calculator 2
|
||||
This program generates a function to calc the value of
|
||||
polynomial given by user in run-time.
|
||||
use boost::spirit::qi
|
||||
*/
|
||||
#ifdef _WIN32
|
||||
#pragma warning(disable : 4127) // for boost(constant condition)
|
||||
#pragma warning(disable : 4512) // for boost
|
||||
#pragma warning(disable : 4819)
|
||||
#endif
|
||||
#include <boost/config/warning_disable.hpp>
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/phoenix_core.hpp>
|
||||
#include <boost/spirit/include/phoenix_container.hpp>
|
||||
#include <boost/spirit/include/phoenix_bind.hpp>
|
||||
#include <boost/timer.hpp>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
enum Operand {
|
||||
OpAdd,
|
||||
OpSub,
|
||||
OpMul,
|
||||
OpDiv,
|
||||
OpNeg,
|
||||
OpImm,
|
||||
OpVarX
|
||||
};
|
||||
|
||||
struct Code {
|
||||
Operand op_;
|
||||
double val_;
|
||||
Code(Operand op)
|
||||
: op_(op)
|
||||
, val_(0)
|
||||
{
|
||||
}
|
||||
Code(double val)
|
||||
: op_(OpImm)
|
||||
, val_(val)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::vector<Code> CodeSet;
|
||||
|
||||
struct Vm {
|
||||
CodeSet code_;
|
||||
double operator()(double x) const
|
||||
{
|
||||
const size_t maxStack = 16;
|
||||
double stack[maxStack];
|
||||
double *p = stack;
|
||||
CodeSet::const_iterator pc = code_.begin();
|
||||
|
||||
while (pc != code_.end()) {
|
||||
switch (pc->op_) {
|
||||
case OpVarX:
|
||||
*p++ = x;
|
||||
break;
|
||||
case OpImm:
|
||||
*p++ = pc->val_;
|
||||
break;
|
||||
case OpNeg:
|
||||
p[-1] = -p[-1];
|
||||
break;
|
||||
case OpAdd:
|
||||
--p;
|
||||
p[-1] += p[0];
|
||||
break;
|
||||
case OpSub:
|
||||
--p;
|
||||
p[-1] -= p[0];
|
||||
break;
|
||||
case OpMul:
|
||||
--p;
|
||||
p[-1] *= p[0];
|
||||
break;
|
||||
case OpDiv:
|
||||
--p;
|
||||
p[-1] /= p[0];
|
||||
break;
|
||||
}
|
||||
++pc;
|
||||
assert(p < stack + maxStack);
|
||||
}
|
||||
return p[-1];
|
||||
}
|
||||
};
|
||||
|
||||
class Jit : public Xbyak::CodeGenerator {
|
||||
private:
|
||||
enum {
|
||||
MAX_CONST_NUM = 32
|
||||
};
|
||||
MIE_ALIGN(16) double constTbl_[MAX_CONST_NUM];
|
||||
Xbyak::uint64 negConst_;
|
||||
size_t constTblPos_;
|
||||
#ifdef XBYAK32
|
||||
const Xbyak::Reg32& varTbl_;
|
||||
const Xbyak::Reg32& tbl_;
|
||||
#else
|
||||
const Xbyak::Reg64& tbl_;
|
||||
#endif
|
||||
int regIdx_;
|
||||
public:
|
||||
/*
|
||||
double jit(double x);
|
||||
@note 32bit: x : [esp+4], return fp0
|
||||
64bit: x [rcx](win), xmm0(gcc), return xmm0
|
||||
*/
|
||||
Jit()
|
||||
: negConst_(Xbyak::uint64(1) << 63)
|
||||
, constTblPos_(0)
|
||||
#ifdef XBYAK32
|
||||
, varTbl_(eax)
|
||||
, tbl_(edx)
|
||||
#elif defined(XBYAK64_WIN)
|
||||
, tbl_(rcx)
|
||||
#else
|
||||
, tbl_(rdi)
|
||||
#endif
|
||||
, regIdx_(-1)
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
lea(varTbl_, ptr [esp+4]);
|
||||
#else
|
||||
#ifdef XBYAK64_WIN
|
||||
movaps(ptr [rsp + 8], xm6); // save xm6, xm7
|
||||
movaps(ptr [rsp + 8 + 16], xm7);
|
||||
#endif
|
||||
movaps(xm7, xm0); // save xm0
|
||||
#endif
|
||||
mov(tbl_, (size_t)constTbl_);
|
||||
}
|
||||
void genPush(double n)
|
||||
{
|
||||
if (constTblPos_ >= MAX_CONST_NUM) throw;
|
||||
constTbl_[constTblPos_] = n;
|
||||
if (regIdx_ == 7) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), ptr[tbl_ + constTblPos_ * sizeof(double)]);
|
||||
constTblPos_++;
|
||||
}
|
||||
void genVarX()
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
if (regIdx_ == 7) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), ptr[varTbl_]);
|
||||
#else
|
||||
if (regIdx_ == 6) throw;
|
||||
movsd(Xbyak::Xmm(++regIdx_), xm7);
|
||||
#endif
|
||||
}
|
||||
void genAdd()
|
||||
{
|
||||
addsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genSub()
|
||||
{
|
||||
subsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genMul()
|
||||
{
|
||||
mulsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genDiv()
|
||||
{
|
||||
divsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
|
||||
}
|
||||
void genNeg()
|
||||
{
|
||||
xorpd(Xbyak::Xmm(regIdx_), ptr [tbl_ + MAX_CONST_NUM * sizeof(double)]);
|
||||
}
|
||||
void complete()
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
sub(esp, 8);
|
||||
movsd(ptr [esp], xm0);
|
||||
fld(qword [esp]);
|
||||
add(esp, 8);
|
||||
#else
|
||||
#ifdef XBYAK64_WIN
|
||||
movaps(xm6, ptr [rsp + 8]);
|
||||
movaps(xm7, ptr [rsp + 8 + 16]);
|
||||
#endif
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Iterator>
|
||||
struct Parser : boost::spirit::qi::grammar<Iterator, boost::spirit::ascii::space_type> {
|
||||
boost::spirit::qi::rule<Iterator, boost::spirit::ascii::space_type> expression, term, factor;
|
||||
CodeSet& code_;
|
||||
Parser(CodeSet& code)
|
||||
: Parser::base_type(expression)
|
||||
, code_(code)
|
||||
{
|
||||
namespace qi = boost::spirit::qi;
|
||||
using namespace qi::labels;
|
||||
|
||||
using boost::phoenix::ref;
|
||||
using boost::phoenix::push_back;
|
||||
|
||||
expression = term >> *(('+' > term[push_back(ref(code_), OpAdd)])
|
||||
| ('-' > term[push_back(ref(code_), OpSub)]));
|
||||
|
||||
term = factor >> *(('*' > factor[push_back(ref(code_), OpMul)])
|
||||
| ('/' > factor[push_back(ref(code_), OpDiv)]));
|
||||
|
||||
factor = qi::double_[push_back(ref(code_), _1)]
|
||||
| qi::lit('x')[push_back(ref(code_), OpVarX)]
|
||||
| ('(' > expression > ')')
|
||||
| ('-' > factor[push_back(ref(code_), OpNeg)])
|
||||
| ('+' > factor);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Iterator>
|
||||
struct ParserJit : boost::spirit::qi::grammar<Iterator, boost::spirit::ascii::space_type> {
|
||||
boost::spirit::qi::rule<Iterator, boost::spirit::ascii::space_type> expression, term, factor;
|
||||
Jit code_;
|
||||
ParserJit()
|
||||
: ParserJit::base_type(expression)
|
||||
{
|
||||
namespace qi = boost::spirit::qi;
|
||||
using namespace qi::labels;
|
||||
|
||||
using boost::phoenix::ref;
|
||||
using boost::phoenix::push_back;
|
||||
using boost::phoenix::bind;
|
||||
|
||||
expression = term >> *(('+' > term[bind(&Jit::genAdd, ref(code_))])
|
||||
| ('-' > term[bind(&Jit::genSub, ref(code_))]));
|
||||
|
||||
term = factor >> *(('*' > factor[bind(&Jit::genMul, ref(code_))])
|
||||
| ('/' > factor[bind(&Jit::genDiv, ref(code_))]));
|
||||
|
||||
factor = qi::double_[bind(&Jit::genPush, ref(code_), _1)]
|
||||
| qi::lit('x')[bind(&Jit::genVarX, ref(code_))]
|
||||
| ('(' > expression > ')')
|
||||
| ('-' > factor[bind(&Jit::genNeg, ref(code_))])
|
||||
| ('+' > factor);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Func>
|
||||
void Test(const char *msg, const Func& f)
|
||||
{
|
||||
printf("%s:", msg);
|
||||
boost::timer t;
|
||||
double sum = 0;
|
||||
for (double x = 0; x < 1000; x += 0.0001) {
|
||||
sum += f(x);
|
||||
}
|
||||
printf("sum=%f, %fsec\n", sum, t.elapsed());
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "input formula\n");
|
||||
return 1;
|
||||
}
|
||||
const std::string str(argv[1]);
|
||||
|
||||
try {
|
||||
Vm vm;
|
||||
Parser<std::string::const_iterator> parser(vm.code_);
|
||||
ParserJit<std::string::const_iterator> parserJit;
|
||||
|
||||
const std::string::const_iterator end = str.end();
|
||||
|
||||
std::string::const_iterator i = str.begin();
|
||||
if (!phrase_parse(i, end, parser, boost::spirit::ascii::space) || i != end) {
|
||||
puts("err 1");
|
||||
return 1;
|
||||
}
|
||||
printf("ret=%f\n", vm(2.3));
|
||||
|
||||
i = str.begin();
|
||||
if (!phrase_parse(i, end, parserJit, boost::spirit::ascii::space) || i != end) {
|
||||
puts("err 2");
|
||||
return 1;
|
||||
}
|
||||
parserJit.code_.complete();
|
||||
double (*jit)(double) = parserJit.code_.getCode<double (*)(double)>();
|
||||
|
||||
Test("VM ", vm);
|
||||
Test("JIT", jit);
|
||||
} catch (...) {
|
||||
fprintf(stderr, "err\n");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
>>++++++++[->++++++++<]>>>>+++++++++[->++++++++++<]>[<<,[->+<<+<<+>>>]<<<[
|
||||
->>>+<<<]>>>>>[->+>>+<<<]>[<<[->+>>+<<<]>>>[-<<<+>>>]<<[[-]<->]>-]>>[-<<<+
|
||||
>>>]<<<<<<<[-<+<<+>>>]<[>>[-<+<<+>>>]<<<[->>>+<<<]>>[[-]>-<]<-]<<[->>>+<<<
|
||||
]>>>>><[[-]>++++++++++++++++++++++++++++++++>[[-]<------------------------
|
||||
-------->]<<]>>[-]<.>>]
|
|
@ -0,0 +1,19 @@
|
|||
++++++[->++++>>+>+>-<<<<<]>
|
||||
[<++++>>+++>++++>>+++>+++++>+++++>>>>>>++>>++<<<<<<<<<<<<<<-]
|
||||
<++++>+++>-->+++>->>--->++>>>+++++[->++>++<<]<<<<<<<<<<
|
||||
|
||||
[->
|
||||
-[>>>>>>>]>[<+++>.>.>>>>..>>>+<]<<<<<
|
||||
-[>>>>]>[<+++++>.>.>..>>>+<]>>>>
|
||||
|
||||
+<-[<<<]<[
|
||||
[-<<+>>]>>>+>+<<<<<<[->>+>+>-<<<<]<
|
||||
]>>
|
||||
|
||||
[[-]<]>[
|
||||
>>>[>.<<.<<<]<[.<<<<]>
|
||||
]
|
||||
|
||||
>.<<<<<<<<<<<
|
||||
]
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
>+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++
|
||||
++>-]<.>+++++++++++[<+++++>-]<.>++++++++[<+++>-]<.+++.------.--------.[-]>
|
||||
++++++++[<++++>-]<+.[-]++++++++++.
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
sample of move(reg, LABEL);, L(LABEL), putL(LABEL);
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
const int expectTbl[] = {
|
||||
5, 9, 12
|
||||
};
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
explicit Code(int mode, size_t size, void *p)
|
||||
: Xbyak::CodeGenerator(size, p)
|
||||
{
|
||||
inLocalLabel();
|
||||
#ifdef XBYAK64
|
||||
const Xbyak::Reg64& a = rax;
|
||||
const Xbyak::Reg64& c = rcx;
|
||||
#ifdef XBYAK64_WIN
|
||||
mov(rax, rcx);
|
||||
#else
|
||||
mov(rax, rdi);
|
||||
#endif
|
||||
#else
|
||||
const Xbyak::Reg32& a = eax;
|
||||
const Xbyak::Reg32& c = ecx;
|
||||
mov(a, ptr [esp + 4]);
|
||||
#endif
|
||||
|
||||
switch (mode) {
|
||||
case 0:
|
||||
mov(c, ".jmp_table");
|
||||
lea(c, ptr [c + a * 8]);
|
||||
jmp(c);
|
||||
align(8);
|
||||
L(".jmp_table");
|
||||
mov(a, expectTbl[0]);
|
||||
ret();
|
||||
align(8);
|
||||
mov(a, expectTbl[1]);
|
||||
ret();
|
||||
align(8);
|
||||
mov(a, expectTbl[2]);
|
||||
ret();
|
||||
break;
|
||||
|
||||
case 1:
|
||||
/*
|
||||
the label for putL is defined when called
|
||||
*/
|
||||
mov(c, ".jmp_table");
|
||||
jmp(ptr [c + a * (int)sizeof(size_t)]);
|
||||
L(".label1");
|
||||
mov(a, expectTbl[0]);
|
||||
jmp(".end");
|
||||
L(".label2");
|
||||
mov(a, expectTbl[1]);
|
||||
jmp(".end");
|
||||
L(".label3");
|
||||
mov(a, expectTbl[2]);
|
||||
jmp(".end");
|
||||
L(".end");
|
||||
ret();
|
||||
ud2();
|
||||
|
||||
align(8);
|
||||
L(".jmp_table");
|
||||
putL(".label1");
|
||||
putL(".label2");
|
||||
putL(".label3");
|
||||
break;
|
||||
|
||||
case 2:
|
||||
/*
|
||||
the label for putL is not defined when called
|
||||
*/
|
||||
jmp(".in");
|
||||
ud2();
|
||||
align(8);
|
||||
L(".jmp_table");
|
||||
putL(".label1");
|
||||
putL(".label2");
|
||||
putL(".label3");
|
||||
L(".in");
|
||||
mov(c, ".jmp_table");
|
||||
jmp(ptr [c + a * (int)sizeof(size_t)]);
|
||||
L(".label1");
|
||||
mov(a, expectTbl[0]);
|
||||
jmp(".end");
|
||||
L(".label2");
|
||||
mov(a, expectTbl[1]);
|
||||
jmp(".end");
|
||||
L(".label3");
|
||||
mov(a, expectTbl[2]);
|
||||
jmp(".end");
|
||||
L(".end");
|
||||
ret();
|
||||
break;
|
||||
}
|
||||
outLocalLabel();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
for (int mode = 0; mode < 3; mode++) {
|
||||
printf("mode=%d\n", mode);
|
||||
for (int grow = 0; grow < 2; grow++) {
|
||||
printf("auto grow=%s\n", grow ? "on" : "off");
|
||||
Code c(mode, grow ? 30 : 4096, grow ? Xbyak::AutoGrow : 0);
|
||||
int (*f)(int) = c.getCode<int (*)(int)>();
|
||||
c.ready();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
const int a = expectTbl[i];
|
||||
const int b = f(i);
|
||||
if (a != b) {
|
||||
printf("ERR i=%d, a=%d, b=%d\n", i, a, b);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
puts("ok");
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR %s\n", e.what());
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct A {
|
||||
int x_;
|
||||
int y_;
|
||||
A() : x_(3), y_(5) {}
|
||||
int func(int a, int b, int c, int d, int e) const { return x_ + y_ + a + b + c + d + e; }
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4510 4512 4610)
|
||||
#endif
|
||||
|
||||
struct Code : public Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
using namespace Xbyak;
|
||||
|
||||
int RET_ADJ = 0;
|
||||
#ifdef XBYAK32
|
||||
#ifdef _WIN32
|
||||
const int PARA_ADJ = 0;
|
||||
RET_ADJ = 5 * 4;
|
||||
#else
|
||||
const int PARA_ADJ = 4;
|
||||
mov(ecx, ptr [esp + 4]);
|
||||
#endif
|
||||
#endif
|
||||
const struct {
|
||||
#ifdef XBYAK32
|
||||
const Reg32& self;
|
||||
#else
|
||||
const Reg64& self;
|
||||
#endif
|
||||
const Operand& a;
|
||||
const Operand& b;
|
||||
const Operand& c;
|
||||
const Operand& d;
|
||||
const Operand& e;
|
||||
} para = {
|
||||
#if defined(XBYAK64_WIN)
|
||||
rcx,
|
||||
edx,
|
||||
r8d,
|
||||
r9d,
|
||||
ptr [rsp + 8 * 5],
|
||||
ptr [rsp + 8 * 6],
|
||||
#elif defined(XBYAK64_GCC)
|
||||
rdi,
|
||||
esi,
|
||||
edx,
|
||||
ecx,
|
||||
r8d,
|
||||
r9d,
|
||||
#else
|
||||
ecx,
|
||||
ptr [esp + 4 + PARA_ADJ],
|
||||
ptr [esp + 8 + PARA_ADJ],
|
||||
ptr [esp + 12 + PARA_ADJ],
|
||||
ptr [esp + 16 + PARA_ADJ],
|
||||
ptr [esp + 20 + PARA_ADJ],
|
||||
#endif
|
||||
};
|
||||
mov(eax, ptr [para.self]);
|
||||
add(eax, ptr [para.self + 4]);
|
||||
add(eax, para.a);
|
||||
add(eax, para.b);
|
||||
add(eax, para.c);
|
||||
add(eax, para.d);
|
||||
add(eax, para.e);
|
||||
ret(RET_ADJ);
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
printf("64bit");
|
||||
#else
|
||||
printf("32bit");
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
puts(" win");
|
||||
#else
|
||||
puts(" linux");
|
||||
#endif
|
||||
try {
|
||||
Code code;
|
||||
int (A::*p)(int, int, int, int, int) const = 0;
|
||||
const void *addr = code.getCode<void*>();
|
||||
memcpy(&p, &addr, sizeof(void*));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
A a;
|
||||
int t1, t2, t3, t4, t5, x, y;
|
||||
a.x_ = rand(); a.y_ = rand();
|
||||
t1 = rand(); t2 = rand(); t3 = rand();
|
||||
t4 = rand(); t5 = rand();
|
||||
x = a.func(t1, t2, t3, t4, t5);
|
||||
y = (a.*p)(t1, t2, t3, t4, t5);
|
||||
printf("%c %d, %d\n", x == y ? 'o' : 'x', x, y);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("err=%s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
How to profile JIT-code with perf or VTune
|
||||
sudo perf record ./profiler 1
|
||||
amplxe-cl -collect hotspots -result-dir r001hs -quiet ./profiler-vtune 2
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
const int N = 3000000;
|
||||
struct Code : public Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
mov(eax, N);
|
||||
Xbyak::Label lp = L();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
sub(eax, 1);
|
||||
}
|
||||
jg(lp);
|
||||
mov(eax, 1);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
struct Code2 : public Xbyak::CodeGenerator {
|
||||
Code2()
|
||||
{
|
||||
mov(eax, N);
|
||||
Xbyak::Label lp = L();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
xorps(xm0, xm0);
|
||||
}
|
||||
sub(eax, 1);
|
||||
jg(lp);
|
||||
mov(eax, 1);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
double s1(int n)
|
||||
{
|
||||
double r = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
r += 1.0 / (i + 1);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
double s2(int n)
|
||||
{
|
||||
double r = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
r += 1.0 / (i * i + 1) + 2.0 / (i + 3);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int mode = argc == 1 ? 0 : atoi(argv[1]);
|
||||
Code c;
|
||||
Code2 c2;
|
||||
int (*f)() = (int (*)())c.getCode();
|
||||
int (*g)() = (int (*)())c2.getCode();
|
||||
|
||||
printf("f:%p, %d\n", (const void*)f, (int)c.getSize());
|
||||
printf("g:%p, %d\n", (const void*)g, (int)c2.getSize());
|
||||
Xbyak::util::Profiler prof;
|
||||
printf("mode=%d\n", mode);
|
||||
prof.init(mode);
|
||||
prof.set("f", (const void*)f, c.getSize());
|
||||
prof.set("g", (const void*)g, c2.getSize());
|
||||
|
||||
double sum = 0;
|
||||
for (int i = 0; i < 20000; i++) {
|
||||
sum += s1(i);
|
||||
sum += s2(i);
|
||||
}
|
||||
printf("sum=%f\n", sum);
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
sum += f();
|
||||
}
|
||||
printf("f=%f\n", sum);
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
sum += g();
|
||||
}
|
||||
printf("g=%f\n", sum);
|
||||
puts("end");
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct Code1 : Xbyak::CodeGenerator {
|
||||
Code1()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
void update()
|
||||
{
|
||||
db(0);
|
||||
}
|
||||
};
|
||||
|
||||
void test1(bool updateCode)
|
||||
{
|
||||
Code1 c;
|
||||
c.setProtectModeRE();
|
||||
if (updateCode) c.update(); // segmentation fault
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("f=%d\n", f());
|
||||
|
||||
c.setProtectModeRW();
|
||||
c.update();
|
||||
puts("ok");
|
||||
}
|
||||
|
||||
struct Code2 : Xbyak::CodeGenerator {
|
||||
Code2()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
void update()
|
||||
{
|
||||
db(0);
|
||||
}
|
||||
};
|
||||
|
||||
void test2(bool updateCode)
|
||||
{
|
||||
Code2 c;
|
||||
c.readyRE();
|
||||
if (updateCode) c.update(); // segmentation fault
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("f=%d\n", f());
|
||||
|
||||
c.setProtectModeRW();
|
||||
c.update();
|
||||
puts("ok");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "%s <testNum> [update]\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
bool update = argc == 3;
|
||||
int n = atoi(argv[1]);
|
||||
printf("n=%d update=%d\n", n, update);
|
||||
switch (n) {
|
||||
case 1: test1(update); break;
|
||||
case 2: test2(update); break;
|
||||
default: fprintf(stderr, "no test %d\n", n); break;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
@author herumi
|
||||
|
||||
JPEG quantize sample
|
||||
This program generates a quantization routine by using fast division algorithm in run-time.
|
||||
|
||||
time(sec)
|
||||
quality 1(high) 10 50 100(low)
|
||||
VC2005 8.0 8.0 8.0 8.0
|
||||
Xbyak 1.6 0.8 0.5 0.5
|
||||
|
||||
|
||||
; generated code at q = 1
|
||||
push esi
|
||||
push edi
|
||||
mov edi,dword ptr [esp+0Ch]
|
||||
mov esi,dword ptr [esp+10h]
|
||||
mov eax,dword ptr [esi]
|
||||
shr eax,4
|
||||
mov dword ptr [edi],eax
|
||||
mov eax,dword ptr [esi+4]
|
||||
mov edx,0BA2E8BA3h
|
||||
mul eax,edx
|
||||
shr edx,3
|
||||
...
|
||||
|
||||
; generated code at q = 100
|
||||
push esi
|
||||
push edi
|
||||
mov edi,dword ptr [esp+0Ch]
|
||||
mov esi,dword ptr [esp+10h]
|
||||
mov eax,dword ptr [esi]
|
||||
mov dword ptr [edi],eax
|
||||
mov eax,dword ptr [esi+4]
|
||||
mov dword ptr [edi+4],eax
|
||||
mov eax,dword ptr [esi+8]
|
||||
mov dword ptr [edi+8],eax
|
||||
mov eax,dword ptr [esi+0Ch]
|
||||
...
|
||||
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4996) // scanf
|
||||
#endif
|
||||
|
||||
typedef Xbyak::uint64 uint64;
|
||||
typedef Xbyak::uint32 uint32;
|
||||
|
||||
const int N = 64;
|
||||
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
static int ilog2(int x)
|
||||
{
|
||||
int shift = 0;
|
||||
while ((1 << shift) <= x) shift++;
|
||||
return shift - 1;
|
||||
}
|
||||
public:
|
||||
/*
|
||||
input : esi
|
||||
output : eax = [esi+offset] / dividend
|
||||
destroy : edx
|
||||
*/
|
||||
void udiv(uint32 dividend, int offset)
|
||||
{
|
||||
mov(eax, ptr[esi + offset]);
|
||||
|
||||
/* dividend = odd x 2^exponent */
|
||||
int exponent = 0, odd = dividend;
|
||||
while ((odd & 1) == 0) {
|
||||
odd >>= 1; exponent++;
|
||||
}
|
||||
|
||||
if (odd == 1) { // trivial case
|
||||
if (exponent) {
|
||||
shr(eax, exponent);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
uint64 mLow, mHigh;
|
||||
int len = ilog2(odd) + 1;
|
||||
{
|
||||
uint64 roundUp = uint64(1) << (32 + len);
|
||||
uint64 k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
|
||||
mLow = roundUp / odd;
|
||||
mHigh = (roundUp + k) / odd;
|
||||
}
|
||||
|
||||
while (((mLow >> 1) < (mHigh >> 1)) && (len > 0)) {
|
||||
mLow >>= 1; mHigh >>= 1; len--;
|
||||
}
|
||||
|
||||
uint64 m; int a;
|
||||
if ((mHigh >> 32) == 0) {
|
||||
m = mHigh; a = 0;
|
||||
} else {
|
||||
len = ilog2(odd);
|
||||
uint64 roundDown = uint64(1) << (32 + len);
|
||||
mLow = roundDown / odd;
|
||||
int r = (int)(roundDown % odd);
|
||||
m = (r <= (odd >> 1)) ? mLow : mLow + 1;
|
||||
a = 1;
|
||||
}
|
||||
while ((m & 1) == 0) {
|
||||
m >>= 1; len--;
|
||||
}
|
||||
len += exponent;
|
||||
|
||||
mov(edx, int(m));
|
||||
mul(edx);
|
||||
if (a) {
|
||||
add(eax, int(m));
|
||||
adc(edx, 0);
|
||||
}
|
||||
if (len) {
|
||||
shr(edx, len);
|
||||
}
|
||||
mov(eax, edx);
|
||||
}
|
||||
/*
|
||||
quantize(uint32 dest[64], const uint32 src[64]);
|
||||
*/
|
||||
Quantize(const uint32 qTbl[64])
|
||||
{
|
||||
push(esi);
|
||||
push(edi);
|
||||
const int P_ = 4 * 2;
|
||||
mov(edi, ptr [esp+P_+4]); // dest
|
||||
mov(esi, ptr [esp+P_+8]); // src
|
||||
for (int i = 0; i < N; i++) {
|
||||
udiv(qTbl[i], i * 4);
|
||||
mov(ptr[edi+i*4], eax);
|
||||
}
|
||||
pop(edi);
|
||||
pop(esi);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
void quantize(uint32 dest[64], const uint32 src[64], const uint32 qTbl[64])
|
||||
{
|
||||
for (int i = 0; i < N; i++) {
|
||||
dest[i] = src[i] / qTbl[i];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef XBYAK64
|
||||
int main()
|
||||
{
|
||||
puts("not implemented for 64bit");
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int q;
|
||||
if (argc > 1) {
|
||||
q = atoi(argv[1]);
|
||||
} else {
|
||||
printf("input quantize=");
|
||||
if (scanf("%d", &q) != 1) {
|
||||
fprintf(stderr, "bad number\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
printf("q=%d\n", q);
|
||||
uint32 qTbl[] = {
|
||||
16, 11, 10, 16, 24, 40, 51, 61,
|
||||
12, 12, 14, 19, 26, 58, 60, 55,
|
||||
14, 13, 16, 24, 40, 57, 69, 56,
|
||||
14, 17, 22, 29, 51, 87, 80, 62,
|
||||
18, 22, 37, 56, 68, 109, 103, 77,
|
||||
24, 35, 55, 64, 81, 104, 113, 92,
|
||||
49, 64, 78, 87, 103, 121, 120, 101,
|
||||
72, 92, 95, 98, 112, 100, 103, 99
|
||||
};
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
qTbl[i] /= q;
|
||||
if (qTbl[i] == 0) qTbl[i] = 1;
|
||||
}
|
||||
|
||||
try {
|
||||
uint32 src[N];
|
||||
uint32 dest[N];
|
||||
uint32 dest2[N];
|
||||
for (int i = 0; i < N; i++) {
|
||||
src[i] = rand() % 2048;
|
||||
}
|
||||
|
||||
Quantize jit(qTbl);
|
||||
//printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode());
|
||||
void (*quantize2)(uint32*, const uint32*, const uint32 *) = jit.getCode<void (*)(uint32*, const uint32*, const uint32 *)>();
|
||||
|
||||
quantize(dest, src, qTbl);
|
||||
quantize2(dest2, src, qTbl);
|
||||
for (int i = 0; i < N; i++) {
|
||||
if (dest[i] != dest2[i]) {
|
||||
printf("err[%d] %d %d\n", i, dest[i], dest2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
const int count = 10000000;
|
||||
int begin;
|
||||
|
||||
begin = clock();
|
||||
for (int i = 0; i < count; i++) {
|
||||
quantize(dest, src, qTbl);
|
||||
}
|
||||
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
|
||||
|
||||
begin = clock();
|
||||
for (int i = 0; i < count; i++) {
|
||||
quantize2(dest, src, qTbl);
|
||||
}
|
||||
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,228 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{D06753BF-E1F3-4578-9B18-08673327F77C}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/quantize.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/quantize.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/quantize.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/quantize.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/quantize.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/quantize.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/quantize.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/quantize.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/quantize.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/quantize.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="quantize.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,29 @@
|
|||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
#ifdef XBYAK32
|
||||
#error "this sample is for only 64-bit mode"
|
||||
#endif
|
||||
|
||||
struct Code : public Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
// see xbyak/sample/sf_test.cpp for how to use other parameter
|
||||
Xbyak::util::StackFrame sf(this, 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
Code c;
|
||||
int (*f)(int, int, int) = c.getCode<int(*) (int, int, int)>();
|
||||
int ret = f(3, 5, 2);
|
||||
if (ret == 3 + 5 + 2) {
|
||||
puts("ok");
|
||||
} else {
|
||||
puts("ng");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
sample to use static memory
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
MIE_ALIGN(4096) char buf[4096];
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(sizeof(buf), buf)
|
||||
{
|
||||
puts("generate");
|
||||
printf("ptr=%p, %p\n", getCode(), buf);
|
||||
#ifdef XBYAK32
|
||||
mov(eax, ptr [esp + 4]);
|
||||
add(eax, ptr [esp + 8]);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
lea(rax, ptr [rcx + rdx]);
|
||||
#else
|
||||
lea(rax, ptr [rdi + rsi]);
|
||||
#endif
|
||||
ret();
|
||||
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RE);
|
||||
}
|
||||
~Code()
|
||||
{
|
||||
Xbyak::CodeArray::protect(buf, sizeof(buf), Xbyak::CodeArray::PROTECT_RW);
|
||||
}
|
||||
} s_code;
|
||||
|
||||
inline int add(int a, int b)
|
||||
{
|
||||
return reinterpret_cast<int (*)(int, int)>(buf)(a, b);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int sum = 0;
|
||||
for (int i = 0; i < 10; i++) {
|
||||
sum += add(i, 5);
|
||||
}
|
||||
printf("sum=%d\n", sum);
|
||||
}
|
|
@ -0,0 +1,190 @@
|
|||
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
|
||||
#pragma warning(disable:4514)
|
||||
#pragma warning(disable:4786)
|
||||
#endif
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1900)
|
||||
#pragma warning(disable:4456)
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
class Sample : public Xbyak::CodeGenerator {
|
||||
void operator=(const Sample&);
|
||||
public:
|
||||
Sample(void *userPtr = 0, size_t size = Xbyak::DEFAULT_MAX_CODE_SIZE) : Xbyak::CodeGenerator(size, userPtr)
|
||||
{
|
||||
inLocalLabel(); // use local label for multiple instance
|
||||
#ifdef XBYAK32
|
||||
mov(ecx, ptr [esp + 4]); // n
|
||||
#elif defined(XBYAK64_GCC)
|
||||
mov(ecx, edi); // n
|
||||
#else
|
||||
// n = ecx
|
||||
#endif
|
||||
xor_(eax, eax); // sum
|
||||
test(ecx, ecx);
|
||||
jz(".exit");
|
||||
xor_(edx, edx); // i
|
||||
L(".lp");
|
||||
add(eax, edx);
|
||||
inc(edx);
|
||||
|
||||
cmp(edx, ecx);
|
||||
jbe(".lp"); // jmp to previous @@
|
||||
L(".exit"); // <B>
|
||||
ret();
|
||||
outLocalLabel(); // end of local label
|
||||
}
|
||||
};
|
||||
|
||||
class AddFunc : public Xbyak::CodeGenerator {
|
||||
void operator=(const AddFunc&);
|
||||
public:
|
||||
AddFunc(int y)
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
mov(eax, ptr [esp + 4]);
|
||||
add(eax, y);
|
||||
#elif defined(XBYAK64_WIN)
|
||||
lea(rax, ptr [rcx + y]);
|
||||
#else
|
||||
lea(eax, ptr [edi + y]);
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
int (*get() const)(int) { return getCode<int(*)(int)>(); }
|
||||
};
|
||||
|
||||
class CallAtoi : public Xbyak::CodeGenerator {
|
||||
void operator=(const CallAtoi&);
|
||||
public:
|
||||
CallAtoi()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
#ifdef XBYAK64_WIN
|
||||
sub(rsp, 32); // return-address is destroied if 64bit debug mode
|
||||
#endif
|
||||
mov(rax, (size_t)atoi);
|
||||
call(rax);
|
||||
#ifdef XBYAK64_WIN
|
||||
add(rsp, 32);
|
||||
#endif
|
||||
#else
|
||||
mov(eax, ptr [esp + 4]);
|
||||
push(eax);
|
||||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||
call(atoi);
|
||||
#else
|
||||
call(reinterpret_cast<const void*>(atoi));
|
||||
#endif
|
||||
add(esp, 4);
|
||||
#endif
|
||||
ret();
|
||||
}
|
||||
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
|
||||
};
|
||||
|
||||
class JmpAtoi : public Xbyak::CodeGenerator {
|
||||
void operator=(const JmpAtoi&);
|
||||
public:
|
||||
JmpAtoi()
|
||||
{
|
||||
/* already pushed "456" */
|
||||
#ifdef XBYAK64
|
||||
mov(rax, (size_t)atoi);
|
||||
jmp(rax);
|
||||
#else
|
||||
jmp(reinterpret_cast<const void*>(atoi));
|
||||
#endif
|
||||
}
|
||||
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
|
||||
};
|
||||
|
||||
struct Reset : public Xbyak::CodeGenerator {
|
||||
void init(int n)
|
||||
{
|
||||
xor_(eax, eax);
|
||||
mov(ecx, n);
|
||||
test(ecx, ecx);
|
||||
jnz("@f");
|
||||
ret();
|
||||
L("@@");
|
||||
for (int i = 0; i < 10 - n; i++) {
|
||||
add(eax, ecx);
|
||||
}
|
||||
sub(ecx, 1);
|
||||
jnz("@b");
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
void testReset()
|
||||
{
|
||||
puts("testReset");
|
||||
Reset code;
|
||||
int (*f)(int) = code.getCode<int(*)(int)>();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
code.init(i);
|
||||
int v = f(i);
|
||||
printf("%d %d\n", i, v);
|
||||
code.reset();
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try {
|
||||
Sample s;
|
||||
printf("Xbyak version=%s\n", s.getVersionString());
|
||||
#ifdef XBYAK64_GCC
|
||||
puts("64bit mode(gcc)");
|
||||
#elif defined(XBYAK64_WIN)
|
||||
puts("64bit mode(win)");
|
||||
#else
|
||||
puts("32bit");
|
||||
#endif
|
||||
int (*func)(int) = s.getCode<int (*)(int)>();
|
||||
for (int i = 0; i <= 10; i++) {
|
||||
printf("0 + ... + %d = %d\n", i, func(i));
|
||||
}
|
||||
for (int i = 0; i < 10; i++) {
|
||||
AddFunc a(i);
|
||||
int (*add)(int) = a.get();
|
||||
int y = add(i);
|
||||
printf("%d + %d = %d\n", i, i, y);
|
||||
}
|
||||
CallAtoi c;
|
||||
printf("call atoi(\"123\") = %d\n", c.get()("123"));
|
||||
JmpAtoi j;
|
||||
printf("jmp atoi(\"456\") = %d\n", j.get()("456"));
|
||||
{
|
||||
// use memory allocated by user
|
||||
using namespace Xbyak;
|
||||
const size_t codeSize = 4096;
|
||||
uint8 buf[codeSize + 16];
|
||||
uint8 *p = CodeArray::getAlignedAddress(buf);
|
||||
Sample s(p, codeSize);
|
||||
if (!CodeArray::protect(p, codeSize, CodeArray::PROTECT_RWE)) {
|
||||
fprintf(stderr, "can't protect\n");
|
||||
return 1;
|
||||
}
|
||||
int (*func)(int) = s.getCode<int (*)(int)>();
|
||||
const uint8 *funcp = reinterpret_cast<const uint8*>(func);
|
||||
if (funcp != p) {
|
||||
fprintf(stderr, "internal error %p %p\n", p, funcp);
|
||||
return 1;
|
||||
}
|
||||
printf("0 + ... + %d = %d\n", 100, func(100));
|
||||
CodeArray::protect(p, codeSize, CodeArray::PROTECT_RW);
|
||||
}
|
||||
puts("OK");
|
||||
testReset();
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,228 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/test0.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/test0.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/test0.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/test0.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/test0.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/test0.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/test0.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/test0.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/test0.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/test0.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="test0.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,125 @@
|
|||
#include <stdio.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
struct PopCountTest : public Xbyak::CodeGenerator {
|
||||
PopCountTest(int n)
|
||||
{
|
||||
mov(eax, n);
|
||||
popcnt(eax, eax);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
void putCPUinfo()
|
||||
{
|
||||
using namespace Xbyak::util;
|
||||
Cpu cpu;
|
||||
printf("vendor %s\n", cpu.has(Cpu::tINTEL) ? "intel" : "amd");
|
||||
static const struct {
|
||||
Cpu::Type type;
|
||||
const char *str;
|
||||
} tbl[] = {
|
||||
{ Cpu::tMMX, "mmx" },
|
||||
{ Cpu::tMMX2, "mmx2" },
|
||||
{ Cpu::tCMOV, "cmov" },
|
||||
{ Cpu::tSSE, "sse" },
|
||||
{ Cpu::tSSE2, "sse2" },
|
||||
{ Cpu::tSSE3, "sse3" },
|
||||
{ Cpu::tSSSE3, "ssse3" },
|
||||
{ Cpu::tSSE41, "sse41" },
|
||||
{ Cpu::tSSE42, "sse42" },
|
||||
{ Cpu::tPOPCNT, "popcnt" },
|
||||
{ Cpu::t3DN, "3dn" },
|
||||
{ Cpu::tE3DN, "e3dn" },
|
||||
{ Cpu::tSSE4a, "sse4a" },
|
||||
{ Cpu::tSSE5, "sse5" },
|
||||
{ Cpu::tAESNI, "aesni" },
|
||||
{ Cpu::tRDTSCP, "rdtscp" },
|
||||
{ Cpu::tOSXSAVE, "osxsave(xgetvb)" },
|
||||
{ Cpu::tPCLMULQDQ, "pclmulqdq" },
|
||||
{ Cpu::tAVX, "avx" },
|
||||
{ Cpu::tFMA, "fma" },
|
||||
{ Cpu::tAVX2, "avx2" },
|
||||
{ Cpu::tBMI1, "bmi1" },
|
||||
{ Cpu::tBMI2, "bmi2" },
|
||||
{ Cpu::tLZCNT, "lzcnt" },
|
||||
{ Cpu::tPREFETCHW, "prefetchw" },
|
||||
{ Cpu::tENHANCED_REP, "enh_rep" },
|
||||
{ Cpu::tRDRAND, "rdrand" },
|
||||
{ Cpu::tADX, "adx" },
|
||||
{ Cpu::tRDSEED, "rdseed" },
|
||||
{ Cpu::tSMAP, "smap" },
|
||||
{ Cpu::tHLE, "hle" },
|
||||
{ Cpu::tRTM, "rtm" },
|
||||
{ Cpu::tMPX, "mpx" },
|
||||
{ Cpu::tSHA, "sha" },
|
||||
{ Cpu::tPREFETCHWT1, "prefetchwt1" },
|
||||
{ Cpu::tF16C, "f16c" },
|
||||
{ Cpu::tMOVBE, "movbe" },
|
||||
{ Cpu::tAVX512F, "avx512f" },
|
||||
{ Cpu::tAVX512DQ, "avx512dq" },
|
||||
{ Cpu::tAVX512IFMA, "avx512_ifma" },
|
||||
{ Cpu::tAVX512PF, "avx512pf" },
|
||||
{ Cpu::tAVX512ER, "avx512er" },
|
||||
{ Cpu::tAVX512CD, "avx512cd" },
|
||||
{ Cpu::tAVX512BW, "avx512bw" },
|
||||
{ Cpu::tAVX512VL, "avx512vl" },
|
||||
{ Cpu::tAVX512VBMI, "avx512_vbmi" },
|
||||
{ Cpu::tAVX512_4VNNIW, "avx512_4vnniw" },
|
||||
{ Cpu::tAVX512_4FMAPS, "avx512_4fmaps" },
|
||||
|
||||
{ Cpu::tAVX512_VBMI2, "avx512_vbmi2" },
|
||||
{ Cpu::tGFNI, "gfni" },
|
||||
{ Cpu::tVAES, "vaes" },
|
||||
{ Cpu::tVPCLMULQDQ, "vpclmulqdq" },
|
||||
{ Cpu::tAVX512_VNNI, "avx512_vnni" },
|
||||
{ Cpu::tAVX512_BITALG, "avx512_bitalg" },
|
||||
{ Cpu::tAVX512_VPOPCNTDQ, "avx512_vpopcntdq" },
|
||||
{ Cpu::tAVX512_BF16, "avx512_bf16" },
|
||||
{ Cpu::tAVX512_VP2INTERSECT, "avx512_vp2intersect" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||
}
|
||||
printf("\n");
|
||||
if (cpu.has(Cpu::tPOPCNT)) {
|
||||
const int n = 0x12345678; // bitcount = 13
|
||||
const int ok = 13;
|
||||
int r = PopCountTest(n).getCode<int (*)()>()();
|
||||
if (r == ok) {
|
||||
puts("popcnt ok");
|
||||
} else {
|
||||
printf("popcnt ng %d %d\n", r, ok);
|
||||
}
|
||||
}
|
||||
/*
|
||||
displayFamily displayModel
|
||||
Opteron 2376 10 4
|
||||
Core2 Duo T7100 6 F
|
||||
Core i3-2120T 6 2A
|
||||
Core i7-2600 6 2A
|
||||
Xeon X5650 6 2C
|
||||
Core i7-3517 6 3A
|
||||
Core i7-3930K 6 2D
|
||||
*/
|
||||
cpu.putFamily();
|
||||
if (!cpu.has(Cpu::tINTEL)) return;
|
||||
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
|
||||
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
|
||||
}
|
||||
printf("SmtLevel =%u\n", cpu.getNumCores(Xbyak::util::SmtLevel));
|
||||
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
puts("32bit");
|
||||
#else
|
||||
puts("64bit");
|
||||
#endif
|
||||
putCPUinfo();
|
||||
}
|
|
@ -0,0 +1,228 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/test_util.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/test_util.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/test_util.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/test_util.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/test_util.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/test_util.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/test_util.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/test_util.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/test_util.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/test_util.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="test_util.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,380 @@
|
|||
/*
|
||||
toy vm
|
||||
register A, B : 32bit
|
||||
PC : program counter
|
||||
|
||||
mem_ 4byte x 65536
|
||||
|
||||
すべての命令は4byte固定
|
||||
即値は全て16bit
|
||||
|
||||
R = A or B
|
||||
vldiR, imm ; R = imm
|
||||
vldR, idx ; R = mem_[idx]
|
||||
vstR, idx ; mem_[idx] = R
|
||||
vaddiR, imm ; R += imm
|
||||
vsubiR, imm ; R -= imm
|
||||
vaddR, idx ; R += mem_[idx]
|
||||
vsubR, idx ; R -= mem_[idx]
|
||||
vputR ; print R
|
||||
vjnzR, offset; if (R != 0) then jmp(PC += offset(signed))
|
||||
*/
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
|
||||
#pragma warning(disable:4514)
|
||||
#pragma warning(disable:4786)
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#include <vector>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
#ifdef XBYAK64
|
||||
#error "only 32bit"
|
||||
#endif
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
class ToyVm : public Xbyak::CodeGenerator {
|
||||
typedef std::vector<uint32> Buffer;
|
||||
public:
|
||||
enum Reg {
|
||||
A, B
|
||||
};
|
||||
enum Code {
|
||||
LD, LDI, ST, ADD, ADDI, SUB, SUBI, PUT, JNZ,
|
||||
END_OF_CODE
|
||||
};
|
||||
ToyVm()
|
||||
: mark_(0)
|
||||
{
|
||||
::memset(mem_, 0, sizeof(mem_));
|
||||
}
|
||||
void vldi(Reg r, uint16 imm) { encode(LDI, r, imm); }
|
||||
void vld(Reg r, uint16 idx) { encode(LD, r, idx); }
|
||||
void vst(Reg r, uint16 idx) { encode(ST, r, idx); }
|
||||
void vadd(Reg r, uint16 idx) { encode(ADD, r, idx); }
|
||||
void vaddi(Reg r, uint16 imm) { encode(ADDI, r, imm); }
|
||||
void vsub(Reg r, uint16 idx) { encode(SUB, r, idx); }
|
||||
void vsubi(Reg r, uint16 imm) { encode(SUBI, r, imm); }
|
||||
void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast<uint16>(offset)); }
|
||||
void vput(Reg r) { encode(PUT, r); }
|
||||
void setMark()
|
||||
{
|
||||
mark_ = (int)code_.size();
|
||||
}
|
||||
int getMarkOffset()
|
||||
{
|
||||
return mark_ - (int)code_.size() - 1;
|
||||
}
|
||||
void run()
|
||||
{
|
||||
bool debug = false;//true;
|
||||
uint32 reg[2] = { 0, 0 };
|
||||
const size_t end = code_.size();
|
||||
uint32 pc = 0;
|
||||
for (;;) {
|
||||
uint32 x = code_[pc];
|
||||
uint32 code, r, imm;
|
||||
decode(code, r, imm, x);
|
||||
if (debug) {
|
||||
printf("---\n");
|
||||
printf("A %08x B %08x\n", reg[0], reg[1]);
|
||||
printf("mem_[] = %08x %08x %08x\n", mem_[0], mem_[1], mem_[2]);
|
||||
printf("pc=%4d, code=%02x, r=%d, imm=%04x\n", pc, code, r, imm);
|
||||
}
|
||||
switch (code) {
|
||||
case LDI:
|
||||
reg[r] = imm;
|
||||
break;
|
||||
case LD:
|
||||
reg[r] = mem_[imm];
|
||||
break;
|
||||
case ST:
|
||||
mem_[imm] = reg[r];
|
||||
break;
|
||||
case ADD:
|
||||
reg[r] += mem_[imm];
|
||||
break;
|
||||
case ADDI:
|
||||
reg[r] += imm;
|
||||
break;
|
||||
case SUB:
|
||||
reg[r] -= mem_[imm];
|
||||
break;
|
||||
case SUBI:
|
||||
reg[r] -= imm;
|
||||
break;
|
||||
case PUT:
|
||||
printf("%c %8d(0x%08x)\n", 'A' + r, reg[r], reg[r]);
|
||||
break;
|
||||
case JNZ:
|
||||
if (reg[r] != 0) pc += static_cast<signed short>(imm);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
pc++;
|
||||
if (pc >= end) break;
|
||||
} // for (;;)
|
||||
}
|
||||
void recompile()
|
||||
{
|
||||
using namespace Xbyak;
|
||||
/*
|
||||
esi : A
|
||||
edi : B
|
||||
ebx : mem_
|
||||
for speed up
|
||||
mem_[0] : eax
|
||||
mem_[1] : ecx
|
||||
mem_[2] : edx
|
||||
*/
|
||||
push(ebx);
|
||||
push(esi);
|
||||
push(edi);
|
||||
|
||||
const Reg32 reg[2] = { esi, edi };
|
||||
const Reg32 mem(ebx);
|
||||
|
||||
const Reg32 memTbl[] = { eax, ecx, edx };
|
||||
const size_t memTblNum = NUM_OF_ARRAY(memTbl);
|
||||
for (size_t i = 0; i < memTblNum; i++) xor_(memTbl[i], memTbl[i]);
|
||||
|
||||
xor_(esi, esi);
|
||||
xor_(edi, edi);
|
||||
mov(mem, (size_t)mem_);
|
||||
const size_t end = code_.size();
|
||||
uint32 pc = 0;
|
||||
uint32 labelNum = 0;
|
||||
for (;;) {
|
||||
uint32 x = code_[pc];
|
||||
uint32 code, r, imm;
|
||||
decode(code, r, imm, x);
|
||||
L(Label::toStr(labelNum++));
|
||||
switch (code) {
|
||||
case LDI:
|
||||
mov(reg[r], imm);
|
||||
break;
|
||||
case LD:
|
||||
if (imm < memTblNum) {
|
||||
mov(reg[r], memTbl[imm]);
|
||||
} else {
|
||||
mov(reg[r], ptr[mem + imm * 4]);
|
||||
}
|
||||
break;
|
||||
case ST:
|
||||
if (imm < memTblNum) {
|
||||
mov(memTbl[imm], reg[r]);
|
||||
} else {
|
||||
mov(ptr [mem + imm * 4], reg[r]);
|
||||
}
|
||||
break;
|
||||
case ADD:
|
||||
if (imm < memTblNum) {
|
||||
add(reg[r], memTbl[imm]);
|
||||
} else {
|
||||
add(reg[r], ptr [mem + imm * 4]);
|
||||
}
|
||||
break;
|
||||
case ADDI:
|
||||
add(reg[r], imm);
|
||||
break;
|
||||
case SUB:
|
||||
if (imm < memTblNum) {
|
||||
sub(reg[r], memTbl[imm]);
|
||||
} else {
|
||||
sub(reg[r], ptr [mem + imm * 4]);
|
||||
}
|
||||
break;
|
||||
case SUBI:
|
||||
sub(reg[r], imm);
|
||||
break;
|
||||
case PUT:
|
||||
{
|
||||
static const char *str = "%c %8d(0x%08x)\n";
|
||||
push(eax);
|
||||
push(edx);
|
||||
push(ecx);
|
||||
push(reg[r]);
|
||||
push(reg[r]);
|
||||
push('A' + r);
|
||||
push((int)str);
|
||||
call(reinterpret_cast<const void*>(printf));
|
||||
add(esp, 4 * 4);
|
||||
pop(ecx);
|
||||
pop(edx);
|
||||
pop(eax);
|
||||
}
|
||||
break;
|
||||
case JNZ:
|
||||
test(reg[r], reg[r]);
|
||||
jnz(Label::toStr(labelNum + static_cast<signed short>(imm)));
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
pc++;
|
||||
if (pc >= end) break;
|
||||
} // for (;;)
|
||||
|
||||
pop(edi);
|
||||
pop(esi);
|
||||
pop(ebx);
|
||||
ret();
|
||||
}
|
||||
private:
|
||||
uint32 mem_[65536];
|
||||
Buffer code_;
|
||||
int mark_;
|
||||
void decode(uint32& code, uint32& r, uint32& imm, uint32 x)
|
||||
{
|
||||
code = x >> 24;
|
||||
r = (x >> 16) & 0xff;
|
||||
imm = x & 0xffff;
|
||||
}
|
||||
void encode(Code code, Reg r, uint16 imm = 0)
|
||||
{
|
||||
uint32 x = (code << 24) | (r << 16) | imm;
|
||||
code_.push_back(x);
|
||||
}
|
||||
};
|
||||
|
||||
class Fib : public ToyVm {
|
||||
public:
|
||||
Fib(int n)
|
||||
{
|
||||
if (n >= 65536) {
|
||||
fprintf(stderr, "current version support only imm16\n");
|
||||
return;
|
||||
}
|
||||
/*
|
||||
A : c
|
||||
B : temporary
|
||||
mem_[0] : p
|
||||
mem_[1] : t
|
||||
mem_[2] : n
|
||||
*/
|
||||
vldi(A, 1); // c
|
||||
vst(A, 0); // p(1)
|
||||
vldi(B, static_cast<uint16>(n));
|
||||
vst(B, 2); // n
|
||||
// lp
|
||||
setMark();
|
||||
vst(A, 1); // t = c
|
||||
vadd(A, 0); // c += p
|
||||
vld(B, 1);
|
||||
vst(B, 0); // p = t
|
||||
// vput(A);
|
||||
vld(B, 2);
|
||||
vsubi(B, 1);
|
||||
vst(B, 2); // n--
|
||||
vjnz(B, getMarkOffset());
|
||||
vput(A);
|
||||
}
|
||||
void runByJIT()
|
||||
{
|
||||
getCode<void (*)()>();
|
||||
}
|
||||
};
|
||||
|
||||
void fibC(uint32 n)
|
||||
{
|
||||
uint32 p, c, t;
|
||||
p = 1;
|
||||
c = 1;
|
||||
lp:
|
||||
t = c;
|
||||
c += p;
|
||||
p = t;
|
||||
n--;
|
||||
if (n != 0) goto lp;
|
||||
printf("c=%d(0x%08x)\n", c, c);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try {
|
||||
const int n = 10000;
|
||||
Fib fib(n);
|
||||
|
||||
fib.recompile();
|
||||
|
||||
{
|
||||
Xbyak::util::Clock clk;
|
||||
clk.begin();
|
||||
fib.run();
|
||||
clk.end();
|
||||
printf("vm %.2fKclk\n", clk.getClock() * 1e-3);
|
||||
}
|
||||
|
||||
{
|
||||
Xbyak::util::Clock clk;
|
||||
clk.begin();
|
||||
fib.runByJIT();
|
||||
clk.end();
|
||||
printf("jit %.2fKclk\n", clk.getClock() * 1e-3);
|
||||
}
|
||||
|
||||
{
|
||||
Xbyak::util::Clock clk;
|
||||
clk.begin();
|
||||
fibC(n);
|
||||
clk.end();
|
||||
printf("native C %.2fKclk\n", clk.getClock() * 1e-3);
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
the code generated by Xbyak
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
xor eax,eax
|
||||
xor ecx,ecx
|
||||
xor edx,edx
|
||||
xor esi,esi
|
||||
xor edi,edi
|
||||
mov ebx,0EFF58h
|
||||
mov esi,1
|
||||
mov eax,esi
|
||||
mov edi,2710h
|
||||
mov edx,edi
|
||||
.lp:
|
||||
mov ecx,esi
|
||||
add esi,eax
|
||||
mov edi,ecx
|
||||
mov eax,edi
|
||||
mov edi,edx
|
||||
sub edi,1
|
||||
mov edx,edi
|
||||
test edi,edi
|
||||
jne .lp
|
||||
push eax
|
||||
push edx
|
||||
push ecx
|
||||
push esi
|
||||
push esi
|
||||
push 41h
|
||||
push 42C434h
|
||||
call printf (409342h)
|
||||
add esp,10h
|
||||
pop ecx
|
||||
pop edx
|
||||
pop eax
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
*/
|
|
@ -0,0 +1,228 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{2E41C7AF-39FF-454C-B081-37445378DCB3}</ProjectGuid>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC60.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>15.0.27924.0</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
<IntDir>$(ProjectName)\$(Platform)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Debug/toyvm.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/toyvm.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/toyvm.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Midl>
|
||||
<TypeLibraryName>.\Release/toyvm.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/toyvm.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Debug/toyvm.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<PrecompiledHeaderOutputFile>.\Debug/toyvm.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Debug/toyvm.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
<TypeLibraryName>.\Release/toyvm.tlb</TypeLibraryName>
|
||||
<HeaderFileName />
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<AdditionalIncludeDirectories>../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>.\Release/toyvm.pch</PrecompiledHeaderOutputFile>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0411</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention />
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
<Bscmake>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="toyvm.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,85 @@
|
|||
TARGET = make_nm normalize_prefix jmp address bad_address misc cvt_test cvt_test32
|
||||
XBYAK_INC=../xbyak/xbyak.h
|
||||
BIT=32
|
||||
ifeq ($(shell uname -m),x86_64)
|
||||
BIT=64
|
||||
endif
|
||||
|
||||
ifeq ($(BIT),64)
|
||||
TARGET += jmp64 address64
|
||||
endif
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||
|
||||
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) #-std=c++0x
|
||||
make_nm:
|
||||
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
||||
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) normalize_prefix.cpp -o $@
|
||||
test_mmx: test_mmx.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) test_mmx.cpp -o $@ -lpthread
|
||||
jmp: jmp.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m32
|
||||
jmp64: jmp.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m64
|
||||
address: address.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) address.cpp -o $@ -m32
|
||||
address64: address.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) address.cpp -o $@ -m64
|
||||
bad_address: bad_address.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) bad_address.cpp -o $@
|
||||
misc: misc.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) misc.cpp -o $@
|
||||
cvt_test: cvt_test.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
|
||||
|
||||
test_nm: normalize_prefix jmp bad_address $(TARGET)
|
||||
$(MAKE) -C ../gen
|
||||
./test_nm.sh
|
||||
./test_nm.sh Y
|
||||
./test_nm.sh avx512
|
||||
./test_address.sh
|
||||
./jmp
|
||||
./bad_address
|
||||
./misc
|
||||
./cvt_test
|
||||
./cvt_test32
|
||||
ifeq ($(BIT),64)
|
||||
./test_address.sh 64
|
||||
./test_nm.sh 64
|
||||
./test_nm.sh Y64
|
||||
./jmp64
|
||||
endif
|
||||
|
||||
test_avx: normalize_prefix
|
||||
./test_avx.sh
|
||||
./test_avx.sh Y
|
||||
ifeq ($(BIT),64)
|
||||
./test_address.sh 64
|
||||
./test_avx.sh 64
|
||||
./test_avx.sh Y64
|
||||
endif
|
||||
|
||||
test_avx512: normalize_prefix
|
||||
./test_avx512.sh
|
||||
ifeq ($(BIT),64)
|
||||
./test_avx512.sh 64
|
||||
endif
|
||||
|
||||
test:
|
||||
$(MAKE) test_nm
|
||||
$(MAKE) test_avx
|
||||
$(MAKE) test_avx512
|
||||
|
||||
clean:
|
||||
rm -rf *.o $(TARGET) lib_run nm.cpp nm_frame make_512
|
||||
|
||||
lib_run: lib_test.cpp lib_run.cpp lib.h
|
||||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||
make_nm: make_nm.cpp $(XBYAK_INC)
|
||||
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS
|
||||
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
|
||||
../gen/gen_code.exe > $@
|
||||
../gen/gen_avx512.exe >> $@
|
||||
|
||||
../gen/gen_code.exe: ../gen/gen_code.cpp #../xbyak/xbyak.h
|
||||
cl ../gen/gen_code.cpp $(OPT) /Fe:../gen/gen_code.exe
|
||||
|
||||
../gen/gen_avx512.exe: ../gen/gen_avx512.cpp #../xbyak/xbyak.h
|
||||
cl ../gen/gen_avx512.cpp $(OPT) /Fe:../gen/gen_avx512.exe
|
||||
|
||||
SUB_HEADER=../xbyak/xbyak_mnemonic.h
|
||||
|
||||
all: $(SUB_HEADER)
|
|
@ -0,0 +1,9 @@
|
|||
@echo off
|
||||
echo 32bit
|
||||
rm -rf a.lst b.lst
|
||||
echo nasm
|
||||
nasm -l a.lst -f win32 -DWIN32 test.asm
|
||||
cat a.lst
|
||||
echo yasm
|
||||
yasm -l b.lst -f win32 -DWIN32 test.asm
|
||||
cat b.lst
|
|
@ -0,0 +1,155 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
void genVsibSub(bool isJIT, const char *name, const char *tbl[], size_t tblSize)
|
||||
{
|
||||
for (size_t i = 0; i < tblSize; i++) {
|
||||
if (isJIT) {
|
||||
printf("%s (ymm7, ptr[", name);
|
||||
} else {
|
||||
printf("%s ymm7, [", name);
|
||||
}
|
||||
printf("%s", tbl[i]);
|
||||
if (isJIT) {
|
||||
printf("], ymm4); dump();\n");
|
||||
} else {
|
||||
printf("], ymm4\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
void genVsib(bool isJIT)
|
||||
{
|
||||
if (isJIT) puts("void genVsib() {");
|
||||
const char *vm32xTbl[] = {
|
||||
"xmm0",
|
||||
"xmm0 * 1",
|
||||
"xmm0 + 4",
|
||||
"xmm0 + eax",
|
||||
"xmm0 * 4 + ecx",
|
||||
"xmm3 * 8 + edi + 123",
|
||||
"xmm2 * 2 + 5",
|
||||
"eax + xmm0",
|
||||
"esp + xmm4",
|
||||
};
|
||||
const char *vm32yTbl[] = {
|
||||
"ymm0",
|
||||
"ymm0 * 1",
|
||||
"ymm0 + 4",
|
||||
"ymm0 + eax",
|
||||
"ymm0 * 4 + ecx",
|
||||
"ymm3 * 8 + edi + 123",
|
||||
"ymm2 * 2 + 5",
|
||||
"eax + ymm0",
|
||||
"esp + ymm4",
|
||||
};
|
||||
genVsibSub(isJIT, "vgatherdpd", vm32xTbl, NUM_OF_ARRAY(vm32xTbl));
|
||||
genVsibSub(isJIT, "vgatherqpd", vm32yTbl, NUM_OF_ARRAY(vm32yTbl));
|
||||
#ifdef XBYAK64
|
||||
const char *vm32x64Tbl[] = {
|
||||
"xmm0 + r11",
|
||||
"r13 + xmm15",
|
||||
"123 + rsi + xmm2 * 4",
|
||||
};
|
||||
genVsibSub(isJIT, "vgatherdpd", vm32x64Tbl, NUM_OF_ARRAY(vm32x64Tbl));
|
||||
#endif
|
||||
if (isJIT) puts("}");
|
||||
}
|
||||
|
||||
void genAddress(bool isJIT, const char regTbl[][5], size_t regTblNum)
|
||||
{
|
||||
int count = 0;
|
||||
int funcNum = 1;
|
||||
if (isJIT) {
|
||||
puts("void gen0(){");
|
||||
}
|
||||
for (size_t i = 0; i < regTblNum + 1; i++) {
|
||||
const char *base = regTbl[i];
|
||||
for (size_t j = 0; j < regTblNum + 1; j++) {
|
||||
if (j == 4) continue; /* esp is not index register */
|
||||
const char *index = regTbl[j];
|
||||
static const int scaleTbl[] = { 0, 1, 2, 4, 8 };
|
||||
for (size_t k = 0; k < NUM_OF_ARRAY(scaleTbl); k++) {
|
||||
int scale = scaleTbl[k];
|
||||
static const int dispTbl[] = { 0, 1, 1000, -1, -1000 };
|
||||
for (size_t m = 0; m < NUM_OF_ARRAY(dispTbl); m++) {
|
||||
int disp = dispTbl[m];
|
||||
bool isFirst = true;
|
||||
if (isJIT) {
|
||||
printf("mov (ecx, ptr[");
|
||||
} else {
|
||||
printf("mov ecx, [");
|
||||
}
|
||||
if (i < regTblNum) {
|
||||
printf("%s", base);
|
||||
isFirst = false;
|
||||
}
|
||||
if (j < regTblNum) {
|
||||
if (!isFirst) putchar('+');
|
||||
printf("%s", index);
|
||||
if (scale) printf("*%d", scale);
|
||||
isFirst = false;
|
||||
}
|
||||
if (isFirst) {
|
||||
if (isJIT) printf("(void*)");
|
||||
printf("0x%08X", disp);
|
||||
} else {
|
||||
if (disp >= 0) {
|
||||
putchar('+');
|
||||
}
|
||||
printf("%d", disp);
|
||||
isFirst = false;
|
||||
}
|
||||
if (isJIT) {
|
||||
printf("]); dump();\n");
|
||||
} else {
|
||||
printf("]\n");
|
||||
}
|
||||
if (isJIT) {
|
||||
count++;
|
||||
if ((count % 100) == 0) {
|
||||
printf("}\n void gen%d(){\n", funcNum++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isJIT) puts("}");
|
||||
genVsib(isJIT);
|
||||
if (isJIT) {
|
||||
printf("void gen(){\n");
|
||||
for (int i = 0; i < funcNum; i++) {
|
||||
printf(" gen%d();\n", i);
|
||||
}
|
||||
puts("genVsib();");
|
||||
printf("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
argc--, argv++;
|
||||
bool phase = argc > 0 && strcmp(*argv, "1") == 0;
|
||||
bool isJIT = (argc > 1);
|
||||
fprintf(stderr, "phase:%c %s\n", phase ? '1' : '2', isJIT ? "jit" : "asm");
|
||||
if (phase) {
|
||||
fprintf(stderr, "32bit reg\n");
|
||||
static const char reg32Tbl[][5] = {
|
||||
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
|
||||
#ifdef XBYAK64
|
||||
"r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
|
||||
#endif
|
||||
};
|
||||
genAddress(isJIT, reg32Tbl, NUM_OF_ARRAY(reg32Tbl));
|
||||
} else {
|
||||
#ifdef XBYAK64
|
||||
fprintf(stderr, "64bit reg\n");
|
||||
static const char reg64Tbl[][5] = {
|
||||
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
|
||||
};
|
||||
genAddress(isJIT, reg64Tbl, NUM_OF_ARRAY(reg64Tbl));
|
||||
#endif
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
#include <xbyak/xbyak.h>
|
||||
#include <cybozu/test.hpp>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp + esp]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [ax]), std::exception); // not support
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp * 4]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 16]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(fld(dword [xmm0]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3), std::exception);
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [rax + eax]), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0]), std::exception);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
CYBOZU_TEST_AUTO(exception)
|
||||
{
|
||||
Code c;
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <cybozu/inttype.hpp>
|
||||
#include <cybozu/test.hpp>
|
||||
|
||||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
#ifdef XBYAK64
|
||||
const struct Ptn {
|
||||
const Reg8 *reg8;
|
||||
Reg16 reg16;
|
||||
Reg32 reg32;
|
||||
Reg64 reg64;
|
||||
} tbl[] = {
|
||||
{ &al, ax, eax, rax },
|
||||
{ &bl, bx, ebx, rbx },
|
||||
{ &cl, cx, ecx, rcx },
|
||||
{ &dl, dx, edx, rdx },
|
||||
{ &sil, si, esi, rsi },
|
||||
{ &dil, di, edi, rdi },
|
||||
{ &bpl, bp, ebp, rbp },
|
||||
{ &spl, sp, esp, rsp },
|
||||
{ &r8b, r8w, r8d, r8 },
|
||||
{ &r9b, r9w, r9d, r9 },
|
||||
{ &r10b, r10w, r10d, r10 },
|
||||
{ &r11b, r11w, r11d, r11 },
|
||||
{ &r12b, r12w, r12d, r12 },
|
||||
{ &r13b, r13w, r13d, r13 },
|
||||
{ &r14b, r14w, r14d, r14 },
|
||||
{ &r15b, r15w, r15d, r15 },
|
||||
};
|
||||
#else
|
||||
const struct Ptn {
|
||||
const Reg8 *reg8;
|
||||
Reg16 reg16;
|
||||
Reg32 reg32;
|
||||
} tbl[] = {
|
||||
{ &al, ax, eax },
|
||||
{ &bl, bx, ebx },
|
||||
{ &cl, cx, ecx },
|
||||
{ &dl, dx, edx },
|
||||
{ 0, si, esi },
|
||||
{ 0, di, edi },
|
||||
{ 0, bp, ebp },
|
||||
{ 0, sp, esp },
|
||||
};
|
||||
#endif
|
||||
|
||||
CYBOZU_TEST_AUTO(cvt)
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(tbl) / sizeof(tbl[0]); i++) {
|
||||
if (tbl[i].reg8) {
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt8() == *tbl[i].reg8);
|
||||
}
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt32() == tbl[i].reg32);
|
||||
#ifdef XBYAK64
|
||||
if (tbl[i].reg8) {
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt8() == *tbl[i].reg8);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt64() == tbl[i].reg64);
|
||||
}
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt16() == tbl[i].reg16);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt32() == tbl[i].reg32);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt64() == tbl[i].reg64);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt64() == tbl[i].reg64);
|
||||
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt64() == tbl[i].reg64);
|
||||
#endif
|
||||
}
|
||||
{
|
||||
const Reg8 errTbl[] = {
|
||||
ah, bh, ch, dh
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
|
||||
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt16(), std::exception);
|
||||
}
|
||||
}
|
||||
#ifdef XBYAK32
|
||||
{
|
||||
const Reg16 errTbl[] = {
|
||||
si, di, bp, sp
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
|
||||
CYBOZU_TEST_EXCEPTION(errTbl[i].cvt8(), std::exception);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(changeBit)
|
||||
{
|
||||
using namespace Xbyak::util;
|
||||
#ifdef XBYAK64
|
||||
const size_t N = 7;
|
||||
const Reg* tbl[][N] = {
|
||||
{ &al, &ax, &eax, &rax, &xmm0, &ymm0, &zmm0 },
|
||||
{ &cl, &cx, &ecx, &rcx, &xmm1, &ymm1, &zmm1 },
|
||||
{ &dl, &dx, &edx, &rdx, &xmm2, &ymm2, &zmm2 },
|
||||
{ &bl, &bx, &ebx, &rbx, &xmm3, &ymm3, &zmm3 },
|
||||
{ &spl, &sp, &esp, &rsp, &xmm4, &ymm4, &zmm4 },
|
||||
{ &bpl, &bp, &ebp, &rbp, &xmm5, &ymm5, &zmm5 },
|
||||
{ &sil, &si, &esi, &rsi, &xmm6, &ymm6, &zmm6 },
|
||||
{ &dil, &di, &edi, &rdi, &xmm7, &ymm7, &zmm7 },
|
||||
{ &r8b, &r8w, &r8d, &r8, &xmm8, &ymm8, &zmm8 },
|
||||
{ &r15b, &r15w, &r15d, &r15, &xmm15, &ymm15, &zmm15 },
|
||||
{ 0, 0, 0, 0, &xmm16, &ymm16, &zmm16 },
|
||||
{ 0, 0, 0, 0, &xmm31, &ymm31, &zmm31 },
|
||||
};
|
||||
const int bitTbl[N] = { 8, 16, 32, 64, 128, 256, 512 };
|
||||
#else
|
||||
const size_t N = 6;
|
||||
const Reg* tbl[][N] = {
|
||||
{ &al, &ax, &eax, &xmm0, &ymm0, &zmm0 },
|
||||
{ &cl, &cx, &ecx, &xmm1, &ymm1, &zmm1 },
|
||||
{ &dl, &dx, &edx, &xmm2, &ymm2, &zmm2 },
|
||||
{ &bl, &bx, &ebx, &xmm3, &ymm3, &zmm3 },
|
||||
{ 0, &sp, &esp, &xmm4, &ymm4, &zmm4 },
|
||||
{ 0, &bp, &ebp, &xmm5, &ymm5, &zmm5 },
|
||||
{ 0, &si, &esi, &xmm6, &ymm6, &zmm6 },
|
||||
{ 0, &di, &edi, &xmm7, &ymm7, &zmm7 },
|
||||
};
|
||||
const int bitTbl[N] = { 8, 16, 32, 128, 256, 512 };
|
||||
#endif
|
||||
|
||||
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
|
||||
for (size_t j = 0; j < N; j++) {
|
||||
const Reg *r1 = tbl[i][j];
|
||||
if (r1 == 0) continue;
|
||||
for (size_t k = 0; k < N; k++) {
|
||||
if (tbl[i][k]) {
|
||||
CYBOZU_TEST_ASSERT(*tbl[i][k] == r1->changeBit(bitTbl[k]));
|
||||
// printf("%s->changeBit(%d)=%s %s\n", r1->toString(), bitTbl[k], r1->changeBit(bitTbl[k]).toString(), tbl[i][k]->toString());
|
||||
} else {
|
||||
CYBOZU_TEST_EXCEPTION(r1->changeBit(bitTbl[k]), std::exception);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
const Reg8 *special8bitTbl[] = { &ah, &bh, &ch, &dh };
|
||||
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(special8bitTbl); i++) {
|
||||
CYBOZU_TEST_EXCEPTION(special8bitTbl[i]->changeBit(16), std::exception);
|
||||
}
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
#pragma once
|
||||
/**
|
||||
@file
|
||||
@brief int type definition and macros
|
||||
@author MITSUNARI Shigeo(@herumi)
|
||||
*/
|
||||
|
||||
#if defined(_MSC_VER) && (MSC_VER <= 1500) && !defined(CYBOZU_DEFINED_INTXX)
|
||||
#define CYBOZU_DEFINED_INTXX
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef short int16_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed char int8_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifndef CYBOZU_DEFINED_SSIZE_T
|
||||
#define CYBOZU_DEFINED_SSIZE_T
|
||||
#ifdef _WIN64
|
||||
typedef int64_t ssize_t;
|
||||
#else
|
||||
typedef int32_t ssize_t;
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
#include <unistd.h> // for ssize_t
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_ALIGN
|
||||
#ifdef _MSC_VER
|
||||
#define CYBOZU_ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define CYBOZU_ALIGN(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_FORCE_INLINE
|
||||
#ifdef _MSC_VER
|
||||
#define CYBOZU_FORCE_INLINE __forceinline
|
||||
#else
|
||||
#define CYBOZU_FORCE_INLINE __attribute__((always_inline))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_UNUSED
|
||||
#ifdef __GNUC__
|
||||
#define CYBOZU_UNUSED __attribute__((unused))
|
||||
#else
|
||||
#define CYBOZU_UNUSED
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_ALLOCA
|
||||
#ifdef _MSC_VER
|
||||
#include <malloc.h>
|
||||
#define CYBOZU_ALLOCA(x) _malloca(x)
|
||||
#else
|
||||
#define CYBOZU_ALLOCA(x) __builtin_alloca(x)
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CYBOZU_NUM_OF_ARRAY
|
||||
#define CYBOZU_NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
|
||||
#endif
|
||||
#ifndef CYBOZU_SNPRINTF
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#define CYBOZU_SNPRINTF(x, len, ...) (void)_snprintf_s(x, len, len - 1, __VA_ARGS__)
|
||||
#else
|
||||
#define CYBOZU_SNPRINTF(x, len, ...) (void)snprintf(x, len, __VA_ARGS__)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define CYBOZU_CPP_VERSION_CPP03 0
|
||||
#define CYBOZU_CPP_VERSION_TR1 1
|
||||
#define CYBOZU_CPP_VERSION_CPP11 2
|
||||
#define CYBOZU_CPP_VERSION_CPP14 3
|
||||
#define CYBOZU_CPP_VERSION_CPP17 4
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define CYBOZU_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
|
||||
#else
|
||||
#define CYBOZU_GNUC_PREREQ(major, minor) 0
|
||||
#endif
|
||||
|
||||
#if (__cplusplus >= 201703)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP17
|
||||
#elif (__cplusplus >= 201402)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP14
|
||||
#elif (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1600)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
||||
#else
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
|
||||
#endif
|
||||
#elif CYBOZU_GNUC_PREREQ(4, 5) || (CYBOZU_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || (__clang_major__ >= 3)
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
|
||||
#else
|
||||
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03
|
||||
#endif
|
||||
|
||||
#ifdef CYBOZU_USE_BOOST
|
||||
#define CYBOZU_NAMESPACE_STD boost
|
||||
#define CYBOZU_NAMESPACE_TR1_BEGIN
|
||||
#define CYBOZU_NAMESPACE_TR1_END
|
||||
#elif (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1) && !defined(__APPLE__)
|
||||
#define CYBOZU_NAMESPACE_STD std::tr1
|
||||
#define CYBOZU_NAMESPACE_TR1_BEGIN namespace tr1 {
|
||||
#define CYBOZU_NAMESPACE_TR1_END }
|
||||
#else
|
||||
#define CYBOZU_NAMESPACE_STD std
|
||||
#define CYBOZU_NAMESPACE_TR1_BEGIN
|
||||
#define CYBOZU_NAMESPACE_TR1_END
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_OS_BIT
|
||||
#if defined(_WIN64) || defined(__x86_64__) || defined(__AARCH64EL__) || defined(__EMSCRIPTEN__)
|
||||
#define CYBOZU_OS_BIT 64
|
||||
#else
|
||||
#define CYBOZU_OS_BIT 32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_HOST
|
||||
#define CYBOZU_HOST_UNKNOWN 0
|
||||
#define CYBOZU_HOST_INTEL 1
|
||||
#define CYBOZU_HOST_ARM 2
|
||||
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
|
||||
#define CYBOZU_HOST CYBOZU_HOST_INTEL
|
||||
#elif defined(__arm__) || defined(__AARCH64EL__)
|
||||
#define CYBOZU_HOST CYBOZU_HOST_ARM
|
||||
#else
|
||||
#define CYBOZU_HOST CYBOZU_HOST_UNKNOWN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef CYBOZU_ENDIAN
|
||||
#define CYBOZU_ENDIAN_UNKNOWN 0
|
||||
#define CYBOZU_ENDIAN_LITTLE 1
|
||||
#define CYBOZU_ENDIAN_BIG 2
|
||||
#if (CYBOZU_HOST == CYBOZU_HOST_INTEL)
|
||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
|
||||
#elif (CYBOZU_HOST == CYBOZU_HOST_ARM) && (defined(__ARM_EABI__) || defined(__AARCH64EL__))
|
||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
|
||||
#else
|
||||
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_UNKNOWN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
|
||||
#define CYBOZU_NOEXCEPT noexcept
|
||||
#else
|
||||
#define CYBOZU_NOEXCEPT throw()
|
||||
#endif
|
||||
namespace cybozu {
|
||||
template<class T>
|
||||
void disable_warning_unused_variable(const T&) { }
|
||||
template<class T, class S>
|
||||
T cast(const S* ptr) { return static_cast<T>(static_cast<const void*>(ptr)); }
|
||||
template<class T, class S>
|
||||
T cast(S* ptr) { return static_cast<T>(static_cast<void*>(ptr)); }
|
||||
} // cybozu
|
|
@ -0,0 +1,373 @@
|
|||
#pragma once
|
||||
/**
|
||||
@file
|
||||
@brief unit test class
|
||||
|
||||
@author MITSUNARI Shigeo(@herumi)
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#if defined(_MSC_VER) && (MSC_VER <= 1500)
|
||||
#include <cybozu/inttype.hpp>
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
namespace cybozu { namespace test {
|
||||
|
||||
class AutoRun {
|
||||
typedef void (*Func)();
|
||||
typedef std::list<std::pair<const char*, Func> > UnitTestList;
|
||||
public:
|
||||
AutoRun()
|
||||
: init_(0)
|
||||
, term_(0)
|
||||
, okCount_(0)
|
||||
, ngCount_(0)
|
||||
, exceptionCount_(0)
|
||||
{
|
||||
}
|
||||
void setup(Func init, Func term)
|
||||
{
|
||||
init_ = init;
|
||||
term_ = term;
|
||||
}
|
||||
void append(const char *name, Func func)
|
||||
{
|
||||
list_.push_back(std::make_pair(name, func));
|
||||
}
|
||||
void set(bool isOK)
|
||||
{
|
||||
if (isOK) {
|
||||
okCount_++;
|
||||
} else {
|
||||
ngCount_++;
|
||||
}
|
||||
}
|
||||
std::string getBaseName(const std::string& name) const
|
||||
{
|
||||
#ifdef _WIN32
|
||||
const char sep = '\\';
|
||||
#else
|
||||
const char sep = '/';
|
||||
#endif
|
||||
size_t pos = name.find_last_of(sep);
|
||||
std::string ret = name.substr(pos + 1);
|
||||
pos = ret.find('.');
|
||||
return ret.substr(0, pos);
|
||||
}
|
||||
int run(int, char *argv[])
|
||||
{
|
||||
std::string msg;
|
||||
try {
|
||||
if (init_) init_();
|
||||
for (UnitTestList::const_iterator i = list_.begin(), ie = list_.end(); i != ie; ++i) {
|
||||
std::cout << "ctest:module=" << i->first << std::endl;
|
||||
try {
|
||||
(i->second)();
|
||||
} catch (std::exception& e) {
|
||||
exceptionCount_++;
|
||||
std::cout << "ctest: " << i->first << " is stopped by exception " << e.what() << std::endl;
|
||||
} catch (...) {
|
||||
exceptionCount_++;
|
||||
std::cout << "ctest: " << i->first << " is stopped by unknown exception" << std::endl;
|
||||
}
|
||||
}
|
||||
if (term_) term_();
|
||||
} catch (std::exception& e) {
|
||||
msg = std::string("ctest:err:") + e.what();
|
||||
} catch (...) {
|
||||
msg = "ctest:err: catch unknown exception";
|
||||
}
|
||||
fflush(stdout);
|
||||
if (msg.empty()) {
|
||||
int err = ngCount_ + exceptionCount_;
|
||||
int total = okCount_ + err;
|
||||
std::cout << "ctest:name=" << getBaseName(*argv)
|
||||
<< ", module=" << list_.size()
|
||||
<< ", total=" << total
|
||||
<< ", ok=" << okCount_
|
||||
<< ", ng=" << ngCount_
|
||||
<< ", exception=" << exceptionCount_ << std::endl;
|
||||
return err > 0 ? 1 : 0;
|
||||
} else {
|
||||
std::cout << msg << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
static inline AutoRun& getInstance()
|
||||
{
|
||||
static AutoRun instance;
|
||||
return instance;
|
||||
}
|
||||
private:
|
||||
Func init_;
|
||||
Func term_;
|
||||
int okCount_;
|
||||
int ngCount_;
|
||||
int exceptionCount_;
|
||||
UnitTestList list_;
|
||||
};
|
||||
|
||||
static AutoRun& autoRun = AutoRun::getInstance();
|
||||
|
||||
inline void test(bool ret, const std::string& msg, const std::string& param, const char *file, int line)
|
||||
{
|
||||
autoRun.set(ret);
|
||||
if (!ret) {
|
||||
printf("%s(%d):ctest:%s(%s);\n", file, line, msg.c_str(), param.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
bool isEqual(const T& lhs, const U& rhs)
|
||||
{
|
||||
return lhs == rhs;
|
||||
}
|
||||
|
||||
// avoid warning of comparision of integers of different signs
|
||||
inline bool isEqual(size_t lhs, int rhs)
|
||||
{
|
||||
return lhs == size_t(rhs);
|
||||
}
|
||||
inline bool isEqual(int lhs, size_t rhs)
|
||||
{
|
||||
return size_t(lhs) == rhs;
|
||||
}
|
||||
inline bool isEqual(const char *lhs, const char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
inline bool isEqual(char *lhs, const char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
inline bool isEqual(const char *lhs, char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
inline bool isEqual(char *lhs, char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
}
|
||||
// avoid to compare float directly
|
||||
inline bool isEqual(float lhs, float rhs)
|
||||
{
|
||||
union fi {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} lfi, rfi;
|
||||
lfi.f = lhs;
|
||||
rfi.f = rhs;
|
||||
return lfi.i == rfi.i;
|
||||
}
|
||||
// avoid to compare double directly
|
||||
inline bool isEqual(double lhs, double rhs)
|
||||
{
|
||||
union di {
|
||||
double d;
|
||||
uint64_t i;
|
||||
} ldi, rdi;
|
||||
ldi.d = lhs;
|
||||
rdi.d = rhs;
|
||||
return ldi.i == rdi.i;
|
||||
}
|
||||
|
||||
} } // cybozu::test
|
||||
|
||||
#ifndef CYBOZU_TEST_DISABLE_AUTO_RUN
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
return cybozu::test::autoRun.run(argc, argv);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
alert if !x
|
||||
@param x [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_ASSERT(x) cybozu::test::test(!!(x), "CYBOZU_TEST_ASSERT", #x, __FILE__, __LINE__)
|
||||
|
||||
/**
|
||||
alert if x != y
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL(x, y) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual(x, y); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
/**
|
||||
alert if fabs(x, y) >= eps
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_NEAR(x, y, eps) { \
|
||||
bool _cybozu_isNear = fabs((x) - (y)) < eps; \
|
||||
cybozu::test::test(_cybozu_isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_isNear) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
|
||||
bool _cybozu_eq = x == y; \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << static_cast<const void*>(x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << static_cast<const void*>(y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
/**
|
||||
alert if x[] != y[]
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
@param n [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n) { \
|
||||
for (size_t _cybozu_test_i = 0, _cybozu_ie = (size_t)(n); _cybozu_test_i < _cybozu_ie; _cybozu_test_i++) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual((x)[_cybozu_test_i], (y)[_cybozu_test_i]); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_ARRAY", #x ", " #y ", " #n, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: i=" << _cybozu_test_i << std::endl; \
|
||||
std::cout << "ctest: lhs=" << (x)[_cybozu_test_i] << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y)[_cybozu_test_i] << std::endl; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
always alert
|
||||
@param msg [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_FAIL(msg) cybozu::test::test(false, "CYBOZU_TEST_FAIL", msg, __FILE__, __LINE__)
|
||||
|
||||
/**
|
||||
verify message in exception
|
||||
*/
|
||||
#define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
|
||||
{ \
|
||||
int _cybozu_ret = 0; \
|
||||
std::string _cybozu_errMsg; \
|
||||
try { \
|
||||
statement; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception& _cybozu_e) { \
|
||||
_cybozu_errMsg = _cybozu_e.what(); \
|
||||
if (_cybozu_errMsg.find(msg) == std::string::npos) { \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
} catch (...) { \
|
||||
_cybozu_ret = 3; \
|
||||
} \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else if (_cybozu_ret == 2) { \
|
||||
std::cout << "ctest: bad exception msg:" << _cybozu_errMsg << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
} \
|
||||
} else { \
|
||||
cybozu::test::autoRun.set(true); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CYBOZU_TEST_EXCEPTION(statement, Exception) \
|
||||
{ \
|
||||
int _cybozu_ret = 0; \
|
||||
try { \
|
||||
statement; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception&) { \
|
||||
} catch (...) { \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
} \
|
||||
} else { \
|
||||
cybozu::test::autoRun.set(true); \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
verify statement does not throw
|
||||
*/
|
||||
#define CYBOZU_TEST_NO_EXCEPTION(statement) \
|
||||
try { \
|
||||
statement; \
|
||||
cybozu::test::autoRun.set(true); \
|
||||
} catch (...) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_NO_EXCEPTION", #statement, __FILE__, __LINE__); \
|
||||
}
|
||||
|
||||
/**
|
||||
append auto unit test
|
||||
@param name [in] module name
|
||||
*/
|
||||
#define CYBOZU_TEST_AUTO(name) \
|
||||
void cybozu_test_ ## name(); \
|
||||
struct cybozu_test_local_ ## name { \
|
||||
cybozu_test_local_ ## name() \
|
||||
{ \
|
||||
cybozu::test::autoRun.append(#name, cybozu_test_ ## name); \
|
||||
} \
|
||||
} cybozu_test_local_instance_ ## name; \
|
||||
void cybozu_test_ ## name()
|
||||
|
||||
/**
|
||||
append auto unit test with fixture
|
||||
@param name [in] module name
|
||||
*/
|
||||
#define CYBOZU_TEST_AUTO_WITH_FIXTURE(name, Fixture) \
|
||||
void cybozu_test_ ## name(); \
|
||||
void cybozu_test_real_ ## name() \
|
||||
{ \
|
||||
Fixture f; \
|
||||
cybozu_test_ ## name(); \
|
||||
} \
|
||||
struct cybozu_test_local_ ## name { \
|
||||
cybozu_test_local_ ## name() \
|
||||
{ \
|
||||
cybozu::test::autoRun.append(#name, cybozu_test_real_ ## name); \
|
||||
} \
|
||||
} cybozu_test_local_instance_ ## name; \
|
||||
void cybozu_test_ ## name()
|
||||
|
||||
/**
|
||||
setup fixture
|
||||
@param Fixture [in] class name of fixture
|
||||
@note cstr of Fixture is called before test and dstr of Fixture is called after test
|
||||
*/
|
||||
#define CYBOZU_TEST_SETUP_FIXTURE(Fixture) \
|
||||
Fixture *cybozu_test_local_fixture; \
|
||||
void cybozu_test_local_init() \
|
||||
{ \
|
||||
cybozu_test_local_fixture = new Fixture(); \
|
||||
} \
|
||||
void cybozu_test_local_term() \
|
||||
{ \
|
||||
delete cybozu_test_local_fixture; \
|
||||
} \
|
||||
struct cybozu_test_local_fixture_setup_ { \
|
||||
cybozu_test_local_fixture_setup_() \
|
||||
{ \
|
||||
cybozu::test::autoRun.setup(cybozu_test_local_init, cybozu_test_local_term); \
|
||||
} \
|
||||
} cybozu_test_local_fixture_setup_instance_;
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,20 @@
|
|||
þ½Ž¿
|
||||
Microsoft Visual Studio Solution File, Format Version 10.00
|
||||
# Visual C++ Express 2008
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jmp", "jmp.vcproj", "{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Release|Win32 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Release|Win32.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,195 @@
|
|||
<?xml version="1.0" encoding="shift_jis"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="9.00"
|
||||
Name="jmp"
|
||||
ProjectGUID="{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}"
|
||||
RootNamespace="jmp"
|
||||
Keyword="Win32Proj"
|
||||
TargetFrameworkVersion="196613"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"
|
||||
/>
|
||||
</Platforms>
|
||||
<ToolFiles>
|
||||
</ToolFiles>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="$(SolutionDir)/../"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="4"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
LinkIncremental="2"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="2"
|
||||
EnableIntrinsicFunctions="true"
|
||||
AdditionalIncludeDirectories="$(SolutionDir)/../"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
|
||||
RuntimeLibrary="2"
|
||||
EnableFunctionLevelLinking="true"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
LinkIncremental="1"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="2"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="ソース ファイル"
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\jmp.cpp"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="ヘッダー ファイル"
|
||||
Filter="h;hpp;hxx;hm;inl;inc;xsd"
|
||||
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
|
||||
>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="リソース ファイル"
|
||||
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
|
||||
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
|
||||
>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
|
@ -0,0 +1,63 @@
|
|||
#pragma once
|
||||
#include <stdio.h>
|
||||
|
||||
struct Reg {
|
||||
int r_;
|
||||
Reg(int r) : r_(r) {}
|
||||
};
|
||||
|
||||
inline const Reg& getReg0() { static const Reg r(0); return r; }
|
||||
inline const Reg& getReg1() { static const Reg r(1); return r; }
|
||||
inline const Reg& getReg2() { static const Reg r(2); return r; }
|
||||
|
||||
static const Reg& r0 = getReg0();
|
||||
static const Reg& r1 = getReg1();
|
||||
static const Reg& r2 = getReg2();
|
||||
|
||||
inline void putReg()
|
||||
{
|
||||
puts("putReg");
|
||||
printf("r0=%p, %d\n", &r0, r0.r_);
|
||||
printf("r0=%p, %d\n", &r0, r1.r_);
|
||||
printf("r0=%p, %d\n", &r0, r2.r_);
|
||||
}
|
||||
|
||||
struct A {
|
||||
int a;
|
||||
A()
|
||||
: a(5)
|
||||
{
|
||||
puts("A cstr");
|
||||
}
|
||||
~A()
|
||||
{
|
||||
puts("A dstr");
|
||||
}
|
||||
void put() const
|
||||
{
|
||||
printf("a=%d\n", a);
|
||||
}
|
||||
};
|
||||
|
||||
template<int dummy = 0>
|
||||
struct XT {
|
||||
static A a;
|
||||
};
|
||||
|
||||
template<int dummy>
|
||||
A XT<dummy>::a;
|
||||
|
||||
typedef XT<0> X;
|
||||
|
||||
void init();
|
||||
|
||||
struct Init {
|
||||
Init()
|
||||
{
|
||||
puts("Init");
|
||||
init();
|
||||
putReg();
|
||||
}
|
||||
};
|
||||
static Init s_init;
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
#include <stdio.h>
|
||||
|
||||
static const struct XXX {
|
||||
XXX() { puts("XXX"); }
|
||||
} s_sss;
|
||||
|
||||
struct A {
|
||||
int aaa;
|
||||
A()
|
||||
: aaa(123)
|
||||
{
|
||||
puts("A cstr");
|
||||
}
|
||||
~A()
|
||||
{
|
||||
puts("A dstr");
|
||||
}
|
||||
void put() const
|
||||
{
|
||||
printf("aaa=%d\n", aaa);
|
||||
}
|
||||
};
|
||||
|
||||
template<int dummy = 0>
|
||||
struct XT {
|
||||
static A sss;
|
||||
};
|
||||
|
||||
template<int dummy>
|
||||
A XT<dummy>::sss;
|
||||
|
||||
typedef XT<0> X;
|
||||
|
||||
static struct Init {
|
||||
Init()
|
||||
{
|
||||
puts("Init");
|
||||
X::sss.put();
|
||||
}
|
||||
} s_init;
|
||||
|
||||
int f() { puts("f"); return 4; }
|
||||
|
||||
static const int r = f();
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("main");
|
||||
printf("r=%d\n", r);
|
||||
X::sss.put();
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
#include "lib.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("main");
|
||||
X::a.put();
|
||||
putReg();
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
#include "lib.h"
|
||||
|
||||
void init()
|
||||
{
|
||||
static bool init = true;
|
||||
printf("in lib_test %d\n", init);
|
||||
if (!init) return;
|
||||
init = false;
|
||||
X::a.put();
|
||||
putReg();
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,724 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <cybozu/inttype.hpp>
|
||||
#include <cybozu/test.hpp>
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
CYBOZU_TEST_AUTO(setSize)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code() : Xbyak::CodeGenerator(4096)
|
||||
{
|
||||
setSize(4095);
|
||||
db(1);
|
||||
size_t size = getSize();
|
||||
CYBOZU_TEST_EQUAL(size, 4096u);
|
||||
CYBOZU_TEST_NO_EXCEPTION(setSize(size));
|
||||
CYBOZU_TEST_EXCEPTION(db(1), Xbyak::Error);
|
||||
}
|
||||
} code;
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(compOperand)
|
||||
{
|
||||
using namespace Xbyak::util;
|
||||
CYBOZU_TEST_ASSERT(eax == eax);
|
||||
CYBOZU_TEST_ASSERT(ecx != xmm0);
|
||||
CYBOZU_TEST_ASSERT(ptr[eax] == ptr[eax]);
|
||||
CYBOZU_TEST_ASSERT(dword[eax] != ptr[eax]);
|
||||
CYBOZU_TEST_ASSERT(ptr[eax] != ptr[eax+3]);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(mov_const)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
const struct {
|
||||
uint64_t v;
|
||||
int bit;
|
||||
bool error;
|
||||
} tbl[] = {
|
||||
{ uint64_t(-1), 8, false },
|
||||
{ 0x12, 8, false },
|
||||
{ 0x80, 8, false },
|
||||
{ 0xff, 8, false },
|
||||
{ 0x100, 8, true },
|
||||
|
||||
{ 1, 16, false },
|
||||
{ uint64_t(-1), 16, false },
|
||||
{ 0x7fff, 16, false },
|
||||
{ 0xffff, 16, false },
|
||||
{ 0x10000, 16, true },
|
||||
|
||||
{ uint64_t(-1), 32, false },
|
||||
{ 0x7fffffff, 32, false },
|
||||
{ uint64_t(-0x7fffffff), 32, false },
|
||||
{ 0xffffffff, 32, false },
|
||||
{ 0x100000000ull, 32, true },
|
||||
|
||||
#ifdef XBYAK64
|
||||
{ uint64_t(-1), 64, false },
|
||||
{ 0x7fffffff, 64, false },
|
||||
{ 0xffffffffffffffffull, 64, false },
|
||||
{ 0x80000000, 64, true },
|
||||
{ 0xffffffff, 64, true },
|
||||
#endif
|
||||
};
|
||||
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
|
||||
const int bit = tbl[i].bit;
|
||||
const uint64_t v = tbl[i].v;
|
||||
const Xbyak::AddressFrame& af = bit == 8 ? byte : bit == 16 ? word : bit == 32 ? dword : qword;
|
||||
if (tbl[i].error) {
|
||||
CYBOZU_TEST_EXCEPTION(mov(af[eax], v), Xbyak::Error);
|
||||
} else {
|
||||
CYBOZU_TEST_NO_EXCEPTION(mov(af[eax], v));
|
||||
}
|
||||
}
|
||||
}
|
||||
} code;
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(align)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
const size_t alignSize = 16;
|
||||
for (int padding = 0; padding < 20; padding++) {
|
||||
for (int i = 0; i < padding; i++) {
|
||||
db(1);
|
||||
}
|
||||
align(alignSize);
|
||||
CYBOZU_TEST_EQUAL(size_t(getCurr()) % alignSize, 0u);
|
||||
}
|
||||
align(alignSize);
|
||||
const uint8 *p = getCurr();
|
||||
// do nothing if aligned
|
||||
align(alignSize);
|
||||
CYBOZU_TEST_EQUAL(p, getCurr());
|
||||
}
|
||||
} c;
|
||||
}
|
||||
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_AUTO(vfmaddps)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
|
||||
v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
|
||||
v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
|
||||
v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
|
||||
vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
|
||||
vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x3f, 0x48, 0x9a, 0x4a, 0x04,
|
||||
0x62, 0x72, 0x3f, 0x08, 0x9b, 0x78, 0x04,
|
||||
0x62, 0xf2, 0x6f, 0x4d, 0xaa, 0x69, 0x08,
|
||||
0x62, 0x62, 0x6f, 0x08, 0xab, 0x7c, 0x24, 0x08,
|
||||
0x62, 0xe2, 0x77, 0xcf, 0x52, 0x78, 0x04,
|
||||
0x62, 0x72, 0x67, 0x4c, 0x53, 0x54, 0x84, 0x04,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vaes)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
|
||||
|
||||
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
|
||||
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
|
||||
|
||||
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDE, 0x61, 0x04,
|
||||
0xC4, 0xE2, 0x6D, 0xDE, 0x49, 0x40,
|
||||
0x62, 0xF2, 0x6D, 0x48, 0xDE, 0x49, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDF, 0x60, 0x04,
|
||||
0x62, 0xE2, 0x0D, 0x20, 0xDF, 0x60, 0x02,
|
||||
0x62, 0xE2, 0x0D, 0x40, 0xDF, 0x60, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDC, 0x61, 0x04,
|
||||
0xC4, 0xE2, 0x6D, 0xDC, 0x49, 0x40,
|
||||
0x62, 0xF2, 0x6D, 0x48, 0xDC, 0x49, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDD, 0x60, 0x04,
|
||||
0x62, 0xE2, 0x0D, 0x20, 0xDD, 0x60, 0x02,
|
||||
0x62, 0xE2, 0x0D, 0x40, 0xDD, 0x60, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpclmulqdq)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
|
||||
|
||||
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03,
|
||||
0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vcompressb_w)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vcompressb(ptr[rax + 64], xmm1);
|
||||
vcompressb(xmm30 | k5, xmm1);
|
||||
vcompressb(ptr[rax + 64], ymm1);
|
||||
vcompressb(ymm30 | k3 |T_z, ymm1);
|
||||
vcompressb(ptr[rax + 64], zmm1);
|
||||
vcompressb(zmm30 | k2 |T_z, zmm1);
|
||||
|
||||
vcompressw(ptr[rax + 64], xmm1);
|
||||
vcompressw(xmm30 | k5, xmm1);
|
||||
vcompressw(ptr[rax + 64], ymm1);
|
||||
vcompressw(ymm30 | k3 |T_z, ymm1);
|
||||
vcompressw(ptr[rax + 64], zmm1);
|
||||
vcompressw(zmm30 | k2 |T_z, zmm1);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce,
|
||||
0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0xab, 0x63, 0xce,
|
||||
0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0xca, 0x63, 0xce,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce,
|
||||
0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0xab, 0x63, 0xce,
|
||||
0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0xca, 0x63, 0xce,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(shld)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x70, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x70, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x70, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x8b, 0x71, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xab, 0x71, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xcb, 0x71, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x71, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x71, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x71, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x70, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x70, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x70, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x8b, 0x71, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0xab, 0x71, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0xcb, 0x71, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x71, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x71, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x71, 0x68, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(shrd)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x72, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x73, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x72, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x72, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x72, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x8b, 0x73, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0xab, 0x73, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0xcb, 0x73, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xdb, 0x73, 0x68, 0x08, 0x05,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x9b, 0x73, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x6d, 0xbb, 0x73, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x6d, 0xdb, 0x73, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x9b, 0x73, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xed, 0xbb, 0x73, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xed, 0xdb, 0x73, 0x68, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpopcnt)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
|
||||
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x54, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x54, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x54, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x54, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x54, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x54, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x55, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x7d, 0x9b, 0x55, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x7d, 0xbb, 0x55, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x7d, 0xdb, 0x55, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x9b, 0x55, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xfd, 0xbb, 0x55, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xfd, 0xdb, 0x55, 0x68, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpdpbus)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x50, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x50, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x50, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x50, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x50, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x50, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x51, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x51, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x51, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x51, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x51, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x51, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x52, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x52, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x52, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x52, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x52, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x52, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x53, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x53, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x53, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x53, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x53, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x53, 0x68, 0x10,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vexpand_vpshufbitqmb)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpexpandb(xmm5|k3|T_z, xmm30);
|
||||
vpexpandb(ymm5|k3|T_z, ymm30);
|
||||
vpexpandb(zmm5|k3|T_z, zmm30);
|
||||
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpexpandw(xmm5|k3|T_z, xmm30);
|
||||
vpexpandw(ymm5|k3|T_z, ymm30);
|
||||
vpexpandw(zmm5|k3|T_z, zmm30);
|
||||
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0x92, 0x7d, 0x8b, 0x62, 0xee,
|
||||
0x62, 0x92, 0x7d, 0xab, 0x62, 0xee,
|
||||
0x62, 0x92, 0x7d, 0xcb, 0x62, 0xee,
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x62, 0x68, 0x40,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x62, 0x68, 0x40,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x62, 0x68, 0x40,
|
||||
|
||||
0x62, 0x92, 0xfd, 0x8b, 0x62, 0xee,
|
||||
0x62, 0x92, 0xfd, 0xab, 0x62, 0xee,
|
||||
0x62, 0x92, 0xfd, 0xcb, 0x62, 0xee,
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x0a, 0x8f, 0x48, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0x2a, 0x8f, 0x48, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0x4a, 0x8f, 0x48, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(gf2)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
///
|
||||
gf2p8affineinvqb(xmm1, xmm2, 3);
|
||||
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
|
||||
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
///
|
||||
gf2p8affineqb(xmm1, xmm2, 3);
|
||||
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
|
||||
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
///
|
||||
gf2p8mulb(xmm1, xmm2);
|
||||
gf2p8mulb(xmm1, ptr [rax + 0x40]);
|
||||
|
||||
vgf2p8mulb(xmm1, xmm5, xmm2);
|
||||
vgf2p8mulb(ymm1, ymm5, ymm2);
|
||||
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
|
||||
|
||||
vgf2p8mulb(xmm30, xmm31, xmm4);
|
||||
vgf2p8mulb(ymm30, ymm31, ymm4);
|
||||
vgf2p8mulb(zmm30, zmm31, zmm4);
|
||||
|
||||
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x66, 0x0f, 0x3a, 0xcf, 0xca, 0x03,
|
||||
0x66, 0x0f, 0x3a, 0xcf, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xcf, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xcf, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xcf, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xcf, 0x48, 0x40, 0x03,
|
||||
0x62, 0x63, 0x85, 0x00, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x20, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x40, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x89, 0xcf, 0x70, 0x04, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xa9, 0xcf, 0x70, 0x02, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xc9, 0xcf, 0x70, 0x01, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x99, 0xcf, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xb9, 0xcf, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xd9, 0xcf, 0x70, 0x08, 0x05,
|
||||
|
||||
0x66, 0x0f, 0x3a, 0xce, 0xca, 0x03,
|
||||
0x66, 0x0f, 0x3a, 0xce, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xce, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xce, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xce, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xce, 0x48, 0x40, 0x03,
|
||||
0x62, 0x63, 0x85, 0x00, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x20, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x40, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x89, 0xce, 0x70, 0x04, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xa9, 0xce, 0x70, 0x02, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xc9, 0xce, 0x70, 0x01, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05,
|
||||
|
||||
0x66, 0x0f, 0x38, 0xcf, 0xca,
|
||||
0x66, 0x0f, 0x38, 0xcf, 0x48, 0x40,
|
||||
0xc4, 0xe2, 0x51, 0xcf, 0xca,
|
||||
0xc4, 0xe2, 0x55, 0xcf, 0xca,
|
||||
0xc4, 0xe2, 0x51, 0xcf, 0x48, 0x40,
|
||||
0xc4, 0xe2, 0x55, 0xcf, 0x48, 0x40,
|
||||
0x62, 0x62, 0x05, 0x00, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x05, 0x20, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x05, 0x40, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x55, 0x89, 0xcf, 0x70, 0x04,
|
||||
0x62, 0x62, 0x55, 0xa9, 0xcf, 0x70, 0x02,
|
||||
0x62, 0x62, 0x55, 0xc9, 0xcf, 0x70, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(bf16)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]);
|
||||
vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]);
|
||||
vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]);
|
||||
|
||||
vcvtneps2bf16(xmm0, xword [rax + 64]);
|
||||
vcvtneps2bf16(xmm0 | k1, yword [rax + 64]);
|
||||
vcvtneps2bf16(ymm0 | k1, zword [rax + 64]);
|
||||
vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]);
|
||||
|
||||
vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]);
|
||||
vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]);
|
||||
vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x77, 0x09, 0x72, 0x40, 0x04,
|
||||
0x62, 0xf2, 0x7f, 0xa9, 0x72, 0x40, 0x02,
|
||||
0x62, 0xf2, 0x77, 0x49, 0x72, 0x40, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x7e, 0x08, 0x72, 0x40, 0x04,
|
||||
0x62, 0xf2, 0x7e, 0x29, 0x72, 0x40, 0x02,
|
||||
0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01,
|
||||
0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x76, 0x09, 0x52, 0x40, 0x04,
|
||||
0x62, 0xf2, 0x76, 0x29, 0x52, 0x40, 0x02,
|
||||
0x62, 0xf2, 0x76, 0x49, 0x52, 0x40, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,37 @@
|
|||
#define XBYAK_NO_OP_NAMES
|
||||
#include "xbyak/xbyak.h"
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int x)
|
||||
{
|
||||
mov(eax, x);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
#ifdef XBYAK_USE_MMAP_ALLOCATOR
|
||||
puts("use Allocator with mmap");
|
||||
#else
|
||||
puts("use Allocator with posix_memalign");
|
||||
#endif
|
||||
const int N = 70000;
|
||||
std::vector<Code*> v(N);
|
||||
for (int i = 0; i < N; i++) {
|
||||
v[i] = new Code(i);
|
||||
}
|
||||
long long sum = 0;
|
||||
for (int i = 0; i < N; i++) {
|
||||
sum += v[i]->getCode<int (*)()>()();
|
||||
}
|
||||
for (int i = 0; i < N; i++) {
|
||||
delete v[i];
|
||||
}
|
||||
printf("sum=%lld\n", sum);
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR %s\n", e.what());
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
#include <stdio.h>
|
||||
#define XBYAK_ENABLE_OMITTED_OPERAND
|
||||
#include "xbyak/xbyak.h"
|
||||
#define CYBOZU_TEST_DISABLE_AUTO_RUN
|
||||
#include "cybozu/test.hpp"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4245)
|
||||
#pragma warning(disable : 4312)
|
||||
#endif
|
||||
class Sample : public CodeGenerator {
|
||||
void operator=(const Sample&);
|
||||
public:
|
||||
#include "nm.cpp"
|
||||
};
|
||||
|
||||
|
||||
class ErrorSample : public CodeGenerator {
|
||||
void operator=(const ErrorSample&);
|
||||
public:
|
||||
void gen()
|
||||
{
|
||||
CYBOZU_TEST_EXCEPTION(mov(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(test(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(adc(ptr[eax],1), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(setz(eax), std::exception);
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 4u);
|
||||
Sample s;
|
||||
s.gen();
|
||||
ErrorSample es;
|
||||
es.gen();
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
normalize prefix
|
||||
*/
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include <memory.h>
|
||||
|
||||
typedef unsigned char uint8;
|
||||
|
||||
std::string normalize(const std::string& line)
|
||||
{
|
||||
static const char tbl[][3] = { "66", "67", "F2", "F3" };
|
||||
size_t tblNum = sizeof(tbl) / sizeof(tbl[0]);
|
||||
typedef std::set<std::string> StringSet;
|
||||
StringSet suf;
|
||||
|
||||
size_t pos = 0;
|
||||
for (; pos < line.size(); pos += 2) {
|
||||
bool found = false;
|
||||
for (size_t i = 0; i < tblNum; i++) {
|
||||
if (::memcmp(&line[pos], tbl[i], 2) == 0) {
|
||||
found = true;
|
||||
suf.insert(tbl[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) break;
|
||||
}
|
||||
std::string ret;
|
||||
for (StringSet::const_iterator i = suf.begin(), e = suf.end(); i != e; ++i) {
|
||||
ret += *i;
|
||||
}
|
||||
ret += &line[pos];
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
std::string line;
|
||||
while (std::getline(std::cin, line)) {
|
||||
std::string normalizedLine = normalize(line);
|
||||
std::cout << normalizedLine << '\n';//std::endl;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
test script on Windows
|
||||
|
||||
this test requires nasm.exe, yasm.exe, cl.exe, awk, diff
|
||||
|
||||
test_all ; for all tests
|
|
@ -0,0 +1,88 @@
|
|||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
/*
|
||||
dump of vc
|
||||
|
||||
00000000003A0000 F3 0F C2 05 F1 00 00 00 00 cmpeqss xmm0,dword ptr [3A00FAh]
|
||||
00000000003A0009 F7 05 E7 00 00 00 21 00 00 00 test dword ptr [3A00FAh],21h
|
||||
00000000003A0013 0F BA 25 DF 00 00 00 03 bt dword ptr [3A00FAh],3
|
||||
00000000003A001B C4 E3 79 0D 05 D5 00 00 00 03 vblendpd xmm0,xmm0,xmmword ptr [3A00FAh],3
|
||||
00000000003A0025 C4 E3 79 0F 05 CB 00 00 00 04 vpalignr xmm0,xmm0,xmmword ptr [3A00FAh],4
|
||||
00000000003A002F C4 E3 7D 19 1D C1 00 00 00 0C vextractf128 xmmword ptr [3A00FAh],ymm3,0Ch
|
||||
00000000003A0039 C4 E3 75 46 05 B7 00 00 00 0D vperm2i128 ymm0,ymm1,ymmword ptr [3A00FAh],0Dh
|
||||
00000000003A0043 C4 E3 79 1D 15 AD 00 00 00 2C vcvtps2ph mmword ptr [3A00FAh],xmm2,2Ch
|
||||
00000000003A004D C7 05 A3 00 00 00 34 12 00 00 mov dword ptr [3A00FAh],1234h
|
||||
00000000003A0057 C1 25 9C 00 00 00 03 shl dword ptr [3A00FAh],3
|
||||
00000000003A005E D1 2D 96 00 00 00 shr dword ptr [3A00FAh],1
|
||||
00000000003A0064 48 0F A4 05 8D 00 00 00 03 shld qword ptr [3A00FAh],rax,3
|
||||
00000000003A006D 48 6B 05 85 00 00 00 15 imul rax,qword ptr [3A00FAh],15h
|
||||
00000000003A0075 C4 E3 FB F0 05 7B 00 00 00 15 rorx rax,qword ptr [3A00FAh],15h
|
||||
00000000003A007F F7 05 71 00 00 00 05 00 00 00 test dword ptr [3A00FAh],5
|
||||
00000000003A0089 66 48 0F 3A 16 05 66 00 00 00 03 pextrq qword ptr [3A00FAh],xmm0,3
|
||||
00000000003A0094 66 48 0F 3A 22 15 5B 00 00 00 05 pinsrq xmm2,qword ptr [3A00FAh],5
|
||||
00000000003A009F 66 0F 3A 15 0D 51 00 00 00 04 pextrw word ptr [3A00FAh],xmm1,4
|
||||
00000000003A00A9 81 15 47 00 00 00 45 23 01 00 adc dword ptr [3A00FAh],12345h
|
||||
00000000003A00B3 0F BA 25 3F 00 00 00 34 bt dword ptr [3A00FAh],34h
|
||||
00000000003A00BB 66 0F BA 3D 36 00 00 00 34 btc word ptr [3A00FAh],34h
|
||||
00000000003A00C4 0F BA 35 2E 00 00 00 34 btr dword ptr [3A00FAh],34h
|
||||
00000000003A00CC C1 15 27 00 00 00 04 rcl dword ptr [3A00FAh],4
|
||||
00000000003A00D3 48 0F A4 05 1E 00 00 00 04 shld qword ptr [3A00FAh],rax,4
|
||||
00000000003A00DC 0F 3A 0F 05 15 00 00 00 04 palignr mm0,mmword ptr [3A00FAh],4
|
||||
00000000003A00E5 66 0F 3A DF 1D 0B 00 00 00 04 aeskeygenassist xmm3,xmmword ptr [3A00FAh],4
|
||||
00000000003A00EF C4 E3 79 60 15 01 00 00 00 07 vpcmpestrm xmm2,xmmword ptr [3A00FAh],7
|
||||
00000000003A00F9 C3 ret
|
||||
00000000003A00FA F0 DE BC 9A 78 56 34 12
|
||||
*/
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
Xbyak::Label label;
|
||||
cmpss(xmm0, ptr[rip + label], 0);
|
||||
test(dword[rip + label], 33);
|
||||
bt(dword[rip + label ], 3);
|
||||
vblendpd(xmm0, xmm0, dword[rip + label], 3);
|
||||
vpalignr(xmm0, xmm0, qword[rip + label], 4);
|
||||
vextractf128(dword[rip + label], ymm3, 12);
|
||||
vperm2i128(ymm0, ymm1, qword[rip + label], 13);
|
||||
vcvtps2ph(ptr[rip + label], xmm2, 44);
|
||||
mov(dword[rip + label], 0x1234);
|
||||
shl(dword[rip + label], 3);
|
||||
shr(dword[rip + label], 1);
|
||||
shld(qword[rip + label], rax, 3);
|
||||
imul(rax, qword[rip + label], 21);
|
||||
rorx(rax, qword[rip + label], 21);
|
||||
test(dword[rip + label], 5);
|
||||
pextrq(ptr[rip + label], xmm0, 3);
|
||||
pinsrq(xmm2, ptr[rip + label], 5);
|
||||
pextrw(ptr[rip + label], xmm1, 4);
|
||||
adc(dword[rip + label], 0x12345);
|
||||
bt(byte[rip + label], 0x34);
|
||||
btc(word[rip + label], 0x34);
|
||||
btr(dword[rip + label], 0x34);
|
||||
rcl(dword[rip + label], 4);
|
||||
shld(qword[rip + label], rax, 4);
|
||||
palignr(mm0, ptr[rip + label], 4);
|
||||
aeskeygenassist(xmm3, ptr[rip + label], 4);
|
||||
vpcmpestrm(xmm2, ptr[rip + label], 7);
|
||||
ret();
|
||||
L(label);
|
||||
dq(0x123456789abcdef0ull);
|
||||
};
|
||||
};
|
||||
|
||||
void dump(const unsigned char *p, size_t n)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
printf("%02x ", p[i]);
|
||||
if ((i % 16) == 15) putchar('\n');
|
||||
}
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
Code code;
|
||||
void (*f)() = code.getCode<void (*)()>();
|
||||
dump(code.getCode(), code.getSize());
|
||||
f();
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
@echo off
|
||||
set OPT=/EHsc -I../xbyak -I./ /W4 -D_CRT_SECURE_NO_WARNINGS /nologo
|
|
@ -0,0 +1,416 @@
|
|||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
#ifdef XBYAK32
|
||||
#error "this sample is for only 64-bit mode"
|
||||
#endif
|
||||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
struct Code : public Xbyak::CodeGenerator {
|
||||
void gen1()
|
||||
{
|
||||
StackFrame sf(this, 1);
|
||||
mov(rax, sf.p[0]);
|
||||
}
|
||||
void gen2()
|
||||
{
|
||||
StackFrame sf(this, 2);
|
||||
lea(rax, ptr [sf.p[0] + sf.p[1]]);
|
||||
}
|
||||
void gen3()
|
||||
{
|
||||
StackFrame sf(this, 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
void gen4()
|
||||
{
|
||||
StackFrame sf(this, 4);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
void gen5()
|
||||
{
|
||||
StackFrame sf(this, 4, UseRCX);
|
||||
xor_(rcx, rcx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
void gen6()
|
||||
{
|
||||
StackFrame sf(this, 4, UseRCX | UseRDX);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
void gen7()
|
||||
{
|
||||
StackFrame sf(this, 3, UseRCX | UseRDX);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
|
||||
void gen8()
|
||||
{
|
||||
StackFrame sf(this, 3, 3 | UseRCX | UseRDX);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(sf.t[0], 1);
|
||||
mov(sf.t[1], 2);
|
||||
mov(sf.t[2], 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
}
|
||||
|
||||
void gen9()
|
||||
{
|
||||
StackFrame sf(this, 3, 3 | UseRCX | UseRDX, 32);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
mov(sf.t[0], 1);
|
||||
mov(sf.t[1], 2);
|
||||
mov(sf.t[2], 3);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
mov(ptr [rsp + 8 * 0], rax);
|
||||
mov(ptr [rsp + 8 * 1], rax);
|
||||
mov(ptr [rsp + 8 * 2], rax);
|
||||
mov(ptr [rsp + 8 * 3], rax);
|
||||
}
|
||||
|
||||
void gen10()
|
||||
{
|
||||
StackFrame sf(this, 4, 8 | UseRCX | UseRDX, 32);
|
||||
xor_(rcx, rcx);
|
||||
xor_(rdx, rdx);
|
||||
for (int i = 0; i < 8; i++) {
|
||||
mov(sf.t[i], i);
|
||||
}
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
mov(ptr [rsp + 8 * 0], rax);
|
||||
mov(ptr [rsp + 8 * 1], rax);
|
||||
mov(ptr [rsp + 8 * 2], rax);
|
||||
mov(ptr [rsp + 8 * 3], rax);
|
||||
}
|
||||
|
||||
void gen11()
|
||||
{
|
||||
StackFrame sf(this, 0, UseRCX);
|
||||
xor_(rcx, rcx);
|
||||
mov(rax, 3);
|
||||
}
|
||||
|
||||
void gen12()
|
||||
{
|
||||
StackFrame sf(this, 4, UseRDX);
|
||||
xor_(rdx, rdx);
|
||||
mov(rax, sf.p[0]);
|
||||
add(rax, sf.p[1]);
|
||||
add(rax, sf.p[2]);
|
||||
add(rax, sf.p[3]);
|
||||
}
|
||||
|
||||
/*
|
||||
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
|
||||
*/
|
||||
void gen13()
|
||||
{
|
||||
StackFrame sf(this, 1, 13);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, sf.t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, sf.t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
same as gen13
|
||||
*/
|
||||
void gen14()
|
||||
{
|
||||
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
|
||||
Pack t = sf.t;
|
||||
t.append(rcx);
|
||||
t.append(rdx);
|
||||
for (int i = 0; i < 13; i++) {
|
||||
mov(t[i], ptr[sf.p[0] + i * 8]);
|
||||
}
|
||||
mov(rax, t[0]);
|
||||
for (int i = 1; i < 13; i++) {
|
||||
add(rax, t[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
return (1 << 15) - 1;
|
||||
*/
|
||||
void gen15()
|
||||
{
|
||||
StackFrame sf(this, 0, 14, 8);
|
||||
Pack t = sf.t;
|
||||
t.append(rax);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
mov(t[i], 1 << i);
|
||||
}
|
||||
mov(qword[rsp], 0);
|
||||
for (int i = 0; i < 15; i++) {
|
||||
add(ptr[rsp], t[i]);
|
||||
}
|
||||
mov(rax, ptr[rsp]);
|
||||
}
|
||||
};
|
||||
|
||||
struct Code2 : Xbyak::CodeGenerator {
|
||||
Code2()
|
||||
: Xbyak::CodeGenerator(4096 * 32)
|
||||
{
|
||||
}
|
||||
void gen(int pNum, int tNum, int stackSizeByte)
|
||||
{
|
||||
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
||||
if (tNum & UseRCX) xor_(rcx, rcx);
|
||||
if (tNum & UseRDX) xor_(rdx, rdx);
|
||||
for (int i = 0, n = tNum & ~(UseRCX | UseRDX); i < n; i++) {
|
||||
mov(sf.t[i], 5);
|
||||
}
|
||||
for (int i = 0; i < stackSizeByte; i++) {
|
||||
mov(byte [rsp + i], 0);
|
||||
}
|
||||
mov(rax, 1);
|
||||
for (int i = 0; i < pNum; i++) {
|
||||
add(rax, sf.p[i]);
|
||||
}
|
||||
}
|
||||
void gen2(int pNum, int tNum, int stackSizeByte)
|
||||
{
|
||||
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
||||
mov(rax, rsp);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static int errNum = 0;
|
||||
void check(int x, int y)
|
||||
{
|
||||
if (x != y) {
|
||||
printf("err x=%d, y=%d\n", x, y);
|
||||
errNum++;
|
||||
}
|
||||
}
|
||||
|
||||
void verify(const Xbyak::uint8 *f, int pNum)
|
||||
{
|
||||
switch (pNum) {
|
||||
case 0:
|
||||
check(1, reinterpret_cast<int (*)()>(f)());
|
||||
return;
|
||||
case 1:
|
||||
check(11, reinterpret_cast<int (*)(int)>(f)(10));
|
||||
return;
|
||||
case 2:
|
||||
check(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
|
||||
return;
|
||||
case 3:
|
||||
check(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
|
||||
return;
|
||||
case 4:
|
||||
check(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
||||
return;
|
||||
default:
|
||||
printf("ERR pNum=%d\n", pNum);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void testAll()
|
||||
{
|
||||
Code2 code;
|
||||
for (int stackSize = 0; stackSize < 32; stackSize += 7) {
|
||||
for (int pNum = 0; pNum < 4; pNum++) {
|
||||
for (int mode = 0; mode < 4; mode++) {
|
||||
int maxNum = 0;
|
||||
int opt = 0;
|
||||
if (mode == 0) {
|
||||
maxNum = 10;
|
||||
} else if (mode == 1) {
|
||||
maxNum = 9;
|
||||
opt = UseRCX;
|
||||
} else if (mode == 2) {
|
||||
maxNum = 9;
|
||||
opt = UseRDX;
|
||||
} else {
|
||||
maxNum = 8;
|
||||
opt = UseRCX | UseRDX;
|
||||
}
|
||||
for (int tNum = 0; tNum < maxNum; tNum++) {
|
||||
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
|
||||
const Xbyak::uint8 *f = code.getCurr();
|
||||
code.gen(pNum, tNum | opt, stackSize);
|
||||
verify(f, pNum);
|
||||
/*
|
||||
check rsp is 16-byte aligned if stackSize > 0
|
||||
*/
|
||||
if (stackSize > 0) {
|
||||
Code2 c2;
|
||||
c2.gen2(pNum, tNum | opt, stackSize);
|
||||
uint64_t addr = c2.getCode<uint64_t (*)()>()();
|
||||
check(addr % 16, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void testPartial()
|
||||
{
|
||||
Code code;
|
||||
int (*f1)(int) = code.getCurr<int (*)(int)>();
|
||||
code.gen1();
|
||||
check(5, f1(5));
|
||||
|
||||
int (*f2)(int, int) = code.getCurr<int (*)(int, int)>();
|
||||
code.gen2();
|
||||
check(9, f2(3, 6));
|
||||
|
||||
int (*f3)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen3();
|
||||
check(14, f3(1, 4, 9));
|
||||
|
||||
int (*f4)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen4();
|
||||
check(30, f4(1, 4, 9, 16));
|
||||
|
||||
int (*f5)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen5();
|
||||
check(23, f5(2, 5, 7, 9));
|
||||
|
||||
int (*f6)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen6();
|
||||
check(18, f6(3, 4, 5, 6));
|
||||
|
||||
int (*f7)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen7();
|
||||
check(12, f7(3, 4, 5));
|
||||
|
||||
int (*f8)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen8();
|
||||
check(23, f8(5, 8, 10));
|
||||
|
||||
int (*f9)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
||||
code.gen9();
|
||||
check(60, f9(10, 20, 30));
|
||||
|
||||
int (*f10)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen10();
|
||||
check(100, f10(10, 20, 30, 40));
|
||||
|
||||
int (*f11)() = code.getCurr<int (*)()>();
|
||||
code.gen11();
|
||||
check(3, f11());
|
||||
|
||||
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||
code.gen12();
|
||||
check(24, f12(3, 5, 7, 9));
|
||||
|
||||
{
|
||||
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
|
||||
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen13();
|
||||
check(91, f13(tbl));
|
||||
|
||||
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||
code.gen14();
|
||||
check(91, f14(tbl));
|
||||
}
|
||||
int (*f15)() = code.getCurr<int (*)()>();
|
||||
code.gen15();
|
||||
check((1 << 15) - 1, f15());
|
||||
}
|
||||
|
||||
void put(const Xbyak::util::Pack& p)
|
||||
{
|
||||
for (size_t i = 0, n = p.size(); i < n; i++) {
|
||||
printf("%s ", p[i].toString());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void verifyPack(const Xbyak::util::Pack& p, const int *tbl, size_t tblNum)
|
||||
{
|
||||
for (size_t i = 0; i < tblNum; i++) {
|
||||
check(p[i].getIdx(), tbl[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void testPack()
|
||||
{
|
||||
const int N = 10;
|
||||
Xbyak::Reg64 regTbl[N];
|
||||
for (int i = 0; i < N; i++) {
|
||||
regTbl[i] = Xbyak::Reg64(i);
|
||||
}
|
||||
Xbyak::util::Pack p(regTbl, N);
|
||||
const struct {
|
||||
int pos;
|
||||
int num;
|
||||
int tbl[10];
|
||||
} tbl[] = {
|
||||
{ 0, 10, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 1, 9, { 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 2, 8, { 2, 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 3, 7, { 3, 4, 5, 6, 7, 8, 9 } },
|
||||
{ 4, 6, { 4, 5, 6, 7, 8, 9 } },
|
||||
{ 5, 5, { 5, 6, 7, 8, 9 } },
|
||||
{ 6, 4, { 6, 7, 8, 9 } },
|
||||
{ 7, 3, { 7, 8, 9 } },
|
||||
{ 8, 2, { 8, 9 } },
|
||||
{ 9, 1, { 9 } },
|
||||
{ 3, 5, { 3, 4, 5, 6, 7 } },
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(tbl) / sizeof(*tbl); i++) {
|
||||
const int pos = tbl[i].pos;
|
||||
const int num = tbl[i].num;
|
||||
verifyPack(p.sub(pos, num), tbl[i].tbl, num);
|
||||
if (pos + num == N) {
|
||||
verifyPack(p.sub(pos), tbl[i].tbl, num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
testAll();
|
||||
|
||||
testPartial();
|
||||
testPack();
|
||||
printf("errNum=%d\n", errNum);
|
||||
} catch (std::exception& e) {
|
||||
printf("err %s\n", e.what());
|
||||
return 1;
|
||||
} catch (...) {
|
||||
puts("ERR");
|
||||
return 1;
|
||||
}
|
||||
|
Binary file not shown.
|
@ -0,0 +1,37 @@
|
|||
@echo off
|
||||
set FILTER=grep -v warning
|
||||
if /i "%1"=="64" (
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
) else (
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
)
|
||||
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
|
||||
if /i "%1"=="64" (
|
||||
call :sub 1
|
||||
call :sub 2
|
||||
) else (
|
||||
call :sub 1
|
||||
)
|
||||
goto end
|
||||
|
||||
:sub
|
||||
echo cl address.cpp %OPT% %OPT2%
|
||||
cl address.cpp %OPT% %OPT2%
|
||||
address %1% > a.asm
|
||||
echo nasm -f %OPT3% -l a.lst a.asm
|
||||
nasm -f %OPT3% -l a.lst a.asm
|
||||
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
echo address %1% jit > nm.cpp
|
||||
address %1% jit > nm.cpp
|
||||
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
nm_frame > x.lst
|
||||
diff -w x.lst ok.lst
|
||||
wc x.lst
|
||||
|
||||
:end
|
|
@ -0,0 +1,41 @@
|
|||
#!/bin/sh
|
||||
|
||||
FILTER="grep -v warning"
|
||||
|
||||
sub()
|
||||
{
|
||||
|
||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
||||
echo "compile address.cpp"
|
||||
g++ $CFLAGS address.cpp -o address
|
||||
|
||||
./address $1 > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./address $1 jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame > x.lst
|
||||
diff ok.lst x.lst && echo "ok"
|
||||
|
||||
}
|
||||
|
||||
if [ "$1" = "64" ]; then
|
||||
echo "nasm(64bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK64
|
||||
OPT3=win64
|
||||
|
||||
sub 1
|
||||
sub 2
|
||||
else
|
||||
echo "nasm(32bit)"
|
||||
EXE=nasm
|
||||
OPT2=-DXBYAK32
|
||||
OPT3=win32
|
||||
sub 1
|
||||
fi
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
@echo off
|
||||
call test_nm_all
|
||||
echo *** test addressing ***
|
||||
call test_address
|
||||
call test_address 64
|
||||
echo *** test jmp address ***
|
||||
call test_jmp
|
||||
echo *** all test end ***
|
|
@ -0,0 +1,42 @@
|
|||
@echo off
|
||||
set FILTER=cat
|
||||
set Y=0
|
||||
if /i "%1"=="Y" (
|
||||
set Y=1
|
||||
set EXE=yasm.exe
|
||||
set OPT2=-DUSE_YASM -DXBYAK32
|
||||
set OPT3=win32
|
||||
) else if /i "%1"=="64" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else if /i "%1"=="Y64" (
|
||||
set Y=1
|
||||
set EXE=yasm.exe
|
||||
set OPT2=-DUSE_YASM -DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
)
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
|
||||
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
|
||||
make_nm > a.asm
|
||||
%EXE% -f %OPT3% -l a.lst a.asm
|
||||
rem connect "?????-" and "??"
|
||||
if /i "%Y%"=="1" (
|
||||
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
) else (
|
||||
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
)
|
||||
make_nm jit > nm.cpp
|
||||
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
nm_frame |%FILTER% > x.lst
|
||||
diff -w x.lst ok.lst
|
||||
wc x.lst
|
|
@ -0,0 +1,43 @@
|
|||
#!/bin/tcsh
|
||||
|
||||
set FILTER="grep -v warning"
|
||||
|
||||
if ($1 == "Y") then
|
||||
echo "yasm(32bit)"
|
||||
set EXE=yasm
|
||||
set OPT2="-DUSE_YASM -DXBYAK32"
|
||||
set OPT3=win32
|
||||
else if ($1 == "64") then
|
||||
echo "nasm(64bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else if ($1 == "Y64") then
|
||||
echo "yasm(64bit)"
|
||||
set EXE=yasm
|
||||
set OPT2="-DUSE_YASM -DXBYAK64"
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else
|
||||
echo "nasm(32bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
endif
|
||||
|
||||
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
|
||||
echo "compile make_nm.cpp"
|
||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
||||
|
||||
./make_nm > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./make_nm jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
|
@ -0,0 +1,31 @@
|
|||
@echo off
|
||||
set FILTER=cat
|
||||
set Y=0
|
||||
if /i "%1"=="min" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK64 -DMIN_TEST
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else if /i "%1"=="64" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
)
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
echo cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
|
||||
cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
|
||||
make_512 > a.asm
|
||||
%EXE% -f %OPT3% -l a.lst a.asm
|
||||
rem connect "?????-" and "??"
|
||||
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
make_512 jit > nm.cpp
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% /DXBYAK_AVX512
|
||||
nm_frame |%FILTER% > x.lst
|
||||
diff -w x.lst ok.lst
|
||||
wc x.lst
|
|
@ -0,0 +1,32 @@
|
|||
#!/bin/tcsh
|
||||
|
||||
set FILTER="grep -v warning"
|
||||
|
||||
if ($1 == "64") then
|
||||
echo "nasm(64bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else
|
||||
echo "nasm(32bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
endif
|
||||
|
||||
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
|
||||
echo "compile make_512.cpp"
|
||||
g++ $CFLAGS make_512.cpp -o make_512
|
||||
|
||||
./make_512 > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./make_512 jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
|
@ -0,0 +1,13 @@
|
|||
@echo off
|
||||
echo ** nasm-avx(32bit) ***
|
||||
call test_avx
|
||||
echo ** nasm-avx(64bit) ***
|
||||
call test_avx 64
|
||||
echo ** yasm-avx(32bit) ***
|
||||
call test_avx Y
|
||||
echo ** yasm-avx(64bit) ***
|
||||
call test_avx Y64
|
||||
echo ** nasm-avx512(32bit) ***
|
||||
call test_avx512
|
||||
echo ** nasm-avx512(64bit) ***
|
||||
call test_avx512 64
|
|
@ -0,0 +1,4 @@
|
|||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
cl -I../ -I./ -DXBYAK_TEST jmp.cpp %OPT% /Od /Zi
|
||||
jmp
|
|
@ -0,0 +1,4 @@
|
|||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
cl -I../ -I./ -DXBYAK_TEST misc.cpp %OPT% /Od /Zi
|
||||
misc
|
|
@ -0,0 +1,78 @@
|
|||
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
|
||||
#pragma warning(disable:4514)
|
||||
#pragma warning(disable:4786)
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "../../include.mie/mie_thread.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
|
||||
class WriteMMX : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
WriteMMX()
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
mov(ecx, ptr [esp + 4]);
|
||||
#endif
|
||||
movd(mm0, ecx);
|
||||
ret();
|
||||
}
|
||||
void (*set() const)(int x) { return (void (*)(int x))getCode(); }
|
||||
};
|
||||
|
||||
class ReadMMX : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
ReadMMX()
|
||||
{
|
||||
movd(eax, mm0);
|
||||
ret();
|
||||
}
|
||||
int (*get() const)() { return (int (*)())getCode(); }
|
||||
};
|
||||
|
||||
class Test : public MIE::ThreadBase<Test> {
|
||||
int n_;
|
||||
public:
|
||||
Test(int n)
|
||||
: n_(n)
|
||||
{
|
||||
}
|
||||
void threadEntry()
|
||||
{
|
||||
printf("n=%d\n", n_);
|
||||
WriteMMX w;
|
||||
w.set()(n_);
|
||||
ReadMMX r;
|
||||
for (;;) {
|
||||
int b = r.get()();
|
||||
printf("b=%d\n", b);
|
||||
if (b != n_) {
|
||||
printf("mm0 has changed!\n");
|
||||
}
|
||||
MIE::MIE_Sleep(1000);
|
||||
}
|
||||
}
|
||||
void stopThread() { }
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
#ifdef XBYAK32
|
||||
puts("32bit");
|
||||
#else
|
||||
puts("64bit");
|
||||
#endif
|
||||
try {
|
||||
int n = atoi(argc == 1 ? "1223" : argv[1]);
|
||||
Test test0(n), test1(n + 1);
|
||||
test0.beginThread();
|
||||
test1.beginThread();
|
||||
|
||||
test0.joinThread();
|
||||
test1.joinThread();
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:%s\n", e.what());
|
||||
} catch (...) {
|
||||
printf("unknown error\n");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
@echo off
|
||||
set FILTER=cat
|
||||
set Y=0
|
||||
if /i "%1"=="Y" (
|
||||
set Y=1
|
||||
set EXE=yasm.exe
|
||||
set OPT2=-DUSE_YASM -DXBYAK32
|
||||
set OPT3=win32
|
||||
) else if /i "%1"=="64" (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else if /i "%1"=="Y64" (
|
||||
set Y=1
|
||||
set EXE=yasm.exe
|
||||
set OPT2=-DUSE_YASM -DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=normalize_prefix
|
||||
) else (
|
||||
set EXE=nasm.exe
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
)
|
||||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
|
||||
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
|
||||
make_nm > a.asm
|
||||
rm -rf a.lst
|
||||
echo %EXE% -f %OPT3% -l a.lst a.asm
|
||||
%EXE% -f %OPT3% -l a.lst a.asm
|
||||
rem connect "?????-" and "??"
|
||||
if /i "%Y%"=="1" (
|
||||
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
) else (
|
||||
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
|
||||
)
|
||||
make_nm jit > nm.cpp
|
||||
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
|
||||
nm_frame |%FILTER% > x.lst
|
||||
diff -wb x.lst ok.lst && echo "ok"
|
|
@ -0,0 +1,49 @@
|
|||
#!/bin/tcsh
|
||||
|
||||
set FILTER=cat
|
||||
|
||||
if ($1 == "Y") then
|
||||
echo "yasm(32bit)"
|
||||
set EXE=yasm
|
||||
set OPT2="-DUSE_YASM -DXBYAK32"
|
||||
set OPT3=win32
|
||||
else if ($1 == "64") then
|
||||
echo "nasm(64bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else if ($1 == "Y64") then
|
||||
echo "yasm(64bit)"
|
||||
set EXE=yasm
|
||||
set OPT2="-DUSE_YASM -DXBYAK64"
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else if ($1 == "avx512") then
|
||||
echo "nasm(64bit) + avx512"
|
||||
set EXE=nasm
|
||||
set OPT2="-DXBYAK64 -DUSE_AVX512"
|
||||
set OPT3=win64
|
||||
set FILTER=./normalize_prefix
|
||||
else
|
||||
echo "nasm(32bit)"
|
||||
set EXE=nasm
|
||||
set OPT2=-DXBYAK32
|
||||
set OPT3=win32
|
||||
endif
|
||||
|
||||
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
||||
echo "compile make_nm.cpp"
|
||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
||||
|
||||
./make_nm > a.asm
|
||||
echo "asm"
|
||||
$EXE -f$OPT3 a.asm -l a.lst
|
||||
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst
|
||||
|
||||
echo "xbyak"
|
||||
./make_nm jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
|
@ -0,0 +1,11 @@
|
|||
@echo off
|
||||
echo *** nasm(32bit) ***
|
||||
call test_nm
|
||||
echo *** yasm(32bit) ***
|
||||
call test_nm Y
|
||||
echo *** nasm(64bit) ***
|
||||
call test_nm 64
|
||||
echo *** yasm(64bit) ***
|
||||
call test_nm Y64
|
||||
|
||||
call test_avx_all
|
|
@ -0,0 +1,90 @@
|
|||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.28010.2016
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bf", "sample\bf.vcxproj", "{654BD79B-59D3-4B10-BBAA-158BAB272828}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "sample\calc.vcxproj", "{5FDDFAA6-B947-491D-A17E-BBD863846579}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "quantize", "sample\quantize.vcxproj", "{D06753BF-E1F3-4578-9B18-08673327F77C}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test0", "sample\test0.vcxproj", "{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toyvm", "sample\toyvm.vcxproj", "{2E41C7AF-39FF-454C-B081-37445378DCB3}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_util", "sample\test_util.vcxproj", "{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc2", "sample\calc2.vcxproj", "{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Debug|x64 = Debug|x64
|
||||
Release|Win32 = Release|Win32
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|x64.Build.0 = Debug|x64
|
||||
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|Win32.Build.0 = Release|Win32
|
||||
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|x64.ActiveCfg = Release|x64
|
||||
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|x64.Build.0 = Release|x64
|
||||
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|x64.Build.0 = Debug|x64
|
||||
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|Win32.Build.0 = Release|Win32
|
||||
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|x64.ActiveCfg = Release|x64
|
||||
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|x64.Build.0 = Release|x64
|
||||
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|x64.Build.0 = Debug|x64
|
||||
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|Win32.Build.0 = Release|Win32
|
||||
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|x64.ActiveCfg = Release|x64
|
||||
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|x64.Build.0 = Release|x64
|
||||
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|x64.Build.0 = Debug|x64
|
||||
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|Win32.Build.0 = Release|Win32
|
||||
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|x64.ActiveCfg = Release|x64
|
||||
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|x64.Build.0 = Release|x64
|
||||
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|x64.Build.0 = Debug|x64
|
||||
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|Win32.Build.0 = Release|Win32
|
||||
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|x64.ActiveCfg = Release|x64
|
||||
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|x64.Build.0 = Release|x64
|
||||
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|x64.Build.0 = Debug|x64
|
||||
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|Win32.Build.0 = Release|Win32
|
||||
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|x64.ActiveCfg = Release|x64
|
||||
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|x64.Build.0 = Release|x64
|
||||
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|x64.Build.0 = Debug|x64
|
||||
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|Win32.Build.0 = Release|Win32
|
||||
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|x64.ActiveCfg = Release|x64
|
||||
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {DAE0012B-DDCC-4614-9110-D52E351B2A80}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -9,10 +9,8 @@
|
|||
@note modified new BSD license
|
||||
http://opensource.org/licenses/BSD-3-Clause
|
||||
*/
|
||||
#ifndef XBYAK_NO_OP_NAMES
|
||||
#if not +0 // trick to detect whether 'not' is operator or not
|
||||
#error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()."
|
||||
#endif
|
||||
#if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#endif
|
||||
|
||||
#include <stdio.h> // for debug print
|
||||
|
@ -40,6 +38,8 @@
|
|||
// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
|
||||
#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
|
||||
((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
|
||||
#include <unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::unordered_set
|
||||
#include <unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
|
||||
|
@ -49,16 +49,22 @@
|
|||
libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
|
||||
*/
|
||||
#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
|
||||
#include <tr1/unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||
#include <tr1/unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
|
||||
#include <unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||
#include <unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||
|
||||
#else
|
||||
#include <set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::set
|
||||
#include <map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
|
||||
|
@ -72,6 +78,10 @@
|
|||
#include <sys/mman.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#if defined(__APPLE__) && defined(MAP_JIT)
|
||||
#define XBYAK_USE_MAP_JIT
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
#if !defined(_MSC_VER) || (_MSC_VER >= 1600)
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
@ -105,7 +115,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x5650 /* 0xABCD = A.BC(D) */
|
||||
VERSION = 0x5891 /* 0xABCD = A.BC(D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
@ -178,7 +188,8 @@ enum {
|
|||
ERR_INVALID_ZERO,
|
||||
ERR_INVALID_RIP_IN_AUTO_GROW,
|
||||
ERR_INVALID_MIB_ADDRESS,
|
||||
ERR_INTERNAL
|
||||
ERR_X2APIC_IS_NOT_SUPPORTED,
|
||||
ERR_INTERNAL // Put it at last.
|
||||
};
|
||||
|
||||
class Error : public std::exception {
|
||||
|
@ -187,8 +198,7 @@ public:
|
|||
explicit Error(int err) : err_(err)
|
||||
{
|
||||
if (err_ < 0 || err_ > ERR_INTERNAL) {
|
||||
fprintf(stderr, "bad err=%d in Xbyak::Error\n", err_);
|
||||
exit(1);
|
||||
err_ = ERR_INTERNAL;
|
||||
}
|
||||
}
|
||||
operator int() const { return err_; }
|
||||
|
@ -239,9 +249,11 @@ public:
|
|||
"invalid zero",
|
||||
"invalid rip in AutoGrow",
|
||||
"invalid mib address",
|
||||
"internal error",
|
||||
"x2APIC is not supported",
|
||||
"internal error"
|
||||
};
|
||||
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
|
||||
assert(err_ <= ERR_INTERNAL);
|
||||
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
|
||||
return errTbl[err_];
|
||||
}
|
||||
};
|
||||
|
@ -315,6 +327,29 @@ struct Allocator {
|
|||
};
|
||||
|
||||
#ifdef XBYAK_USE_MMAP_ALLOCATOR
|
||||
#ifdef XBYAK_USE_MAP_JIT
|
||||
namespace util {
|
||||
|
||||
inline int getMacOsVersionPure()
|
||||
{
|
||||
char buf[64];
|
||||
size_t size = sizeof(buf);
|
||||
int err = sysctlbyname("kern.osrelease", buf, &size, NULL, 0);
|
||||
if (err != 0) return 0;
|
||||
char *endp;
|
||||
int major = strtol(buf, &endp, 10);
|
||||
if (*endp != '.') return 0;
|
||||
return major;
|
||||
}
|
||||
|
||||
inline int getMacOsVersion()
|
||||
{
|
||||
static const int version = getMacOsVersionPure();
|
||||
return version;
|
||||
}
|
||||
|
||||
} // util
|
||||
#endif
|
||||
class MmapAllocator : Allocator {
|
||||
typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, size_t> SizeList;
|
||||
SizeList sizeList_;
|
||||
|
@ -323,7 +358,11 @@ public:
|
|||
{
|
||||
const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
|
||||
size = (size + alignedSizeM1) & ~alignedSizeM1;
|
||||
#ifdef MAP_ANONYMOUS
|
||||
#if defined(XBYAK_USE_MAP_JIT)
|
||||
int mode = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
const int mojaveVersion = 18;
|
||||
if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT;
|
||||
#elif defined(MAP_ANONYMOUS)
|
||||
const int mode = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
#elif defined(MAP_ANON)
|
||||
const int mode = MAP_PRIVATE | MAP_ANON;
|
||||
|
@ -423,7 +462,8 @@ public:
|
|||
kind_ = kind;
|
||||
bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512;
|
||||
}
|
||||
void setBit(int bit) { bit_ = bit; }
|
||||
// err if MMX/FPU/OPMASK/BNDREG
|
||||
void setBit(int bit);
|
||||
void setOpmaskIdx(int idx, bool ignore_idx0 = false)
|
||||
{
|
||||
if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
|
||||
|
@ -506,6 +546,49 @@ public:
|
|||
const Reg& getReg() const;
|
||||
};
|
||||
|
||||
inline void Operand::setBit(int bit)
|
||||
{
|
||||
if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512) goto ERR;
|
||||
if (isBit(bit)) return;
|
||||
if (is(MEM | OPMASK)) {
|
||||
bit_ = bit;
|
||||
return;
|
||||
}
|
||||
if (is(REG | XMM | YMM | ZMM)) {
|
||||
int idx = getIdx();
|
||||
// err if converting ah, bh, ch, dh
|
||||
if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR;
|
||||
Kind kind = REG;
|
||||
switch (bit) {
|
||||
case 8:
|
||||
if (idx >= 16) goto ERR;
|
||||
#ifdef XBYAK32
|
||||
if (idx >= 4) goto ERR;
|
||||
#else
|
||||
if (4 <= idx && idx < 8) idx |= EXT8BIT;
|
||||
#endif
|
||||
break;
|
||||
case 16:
|
||||
case 32:
|
||||
case 64:
|
||||
if (idx >= 16) goto ERR;
|
||||
break;
|
||||
case 128: kind = XMM; break;
|
||||
case 256: kind = YMM; break;
|
||||
case 512: kind = ZMM; break;
|
||||
}
|
||||
idx_ = idx;
|
||||
kind_ = kind;
|
||||
bit_ = bit;
|
||||
if (bit >= 128) return; // keep mask_ and rounding_
|
||||
mask_ = 0;
|
||||
rounding_ = 0;
|
||||
return;
|
||||
}
|
||||
ERR:
|
||||
throw Error(ERR_CANT_CONVERT);
|
||||
}
|
||||
|
||||
class Label;
|
||||
|
||||
struct Reg8;
|
||||
|
@ -518,7 +601,8 @@ class Reg : public Operand {
|
|||
public:
|
||||
Reg() { }
|
||||
Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
|
||||
Reg changeBit(int bit) const { return Reg(getIdx(), getKind(), bit, isExt8bit()); }
|
||||
// convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM
|
||||
Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; }
|
||||
uint8 getRexW() const { return isREG(64) ? 8 : 0; }
|
||||
uint8 getRexR() const { return isExtIdx() ? 4 : 0; }
|
||||
uint8 getRexX() const { return isExtIdx() ? 2 : 0; }
|
||||
|
@ -617,6 +701,12 @@ struct RegRip {
|
|||
const Label* label_;
|
||||
bool isAddr_;
|
||||
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
|
||||
friend const RegRip operator+(const RegRip& r, int disp) {
|
||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||
}
|
||||
friend const RegRip operator-(const RegRip& r, int disp) {
|
||||
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
|
||||
}
|
||||
friend const RegRip operator+(const RegRip& r, sint64 disp) {
|
||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||
}
|
||||
|
@ -636,34 +726,23 @@ struct RegRip {
|
|||
|
||||
inline Reg8 Reg::cvt8() const
|
||||
{
|
||||
const int idx = getIdx();
|
||||
if (isBit(8)) return Reg8(idx, isExt8bit());
|
||||
#ifdef XBYAK32
|
||||
if (idx >= 4) throw Error(ERR_CANT_CONVERT);
|
||||
#endif
|
||||
return Reg8(idx, 4 <= idx && idx < 8);
|
||||
Reg r = changeBit(8); return Reg8(r.getIdx(), r.isExt8bit());
|
||||
}
|
||||
|
||||
inline Reg16 Reg::cvt16() const
|
||||
{
|
||||
const int idx = getIdx();
|
||||
if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
|
||||
return Reg16(idx);
|
||||
return Reg16(changeBit(16).getIdx());
|
||||
}
|
||||
|
||||
inline Reg32 Reg::cvt32() const
|
||||
{
|
||||
const int idx = getIdx();
|
||||
if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
|
||||
return Reg32(idx);
|
||||
return Reg32(changeBit(32).getIdx());
|
||||
}
|
||||
|
||||
#ifdef XBYAK64
|
||||
inline Reg64 Reg::cvt64() const
|
||||
{
|
||||
const int idx = getIdx();
|
||||
if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT);
|
||||
return Reg64(idx);
|
||||
return Reg64(changeBit(64).getIdx());
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -786,6 +865,7 @@ inline RegExp operator-(const RegExp& e, size_t disp)
|
|||
|
||||
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
|
||||
void *const AutoGrow = (void*)1; //-V566
|
||||
void *const DontSetProtectRWE = (void*)2; //-V566
|
||||
|
||||
class CodeArray {
|
||||
enum Type {
|
||||
|
@ -825,6 +905,7 @@ protected:
|
|||
size_t size_;
|
||||
bool isCalledCalcJmpAddress_;
|
||||
|
||||
bool useProtect() const { return alloc_->useProtect(); }
|
||||
/*
|
||||
allocate new memory and copy old data to the new area
|
||||
*/
|
||||
|
@ -848,12 +929,16 @@ protected:
|
|||
uint64 disp = i->getVal(top_);
|
||||
rewrite(i->codeOffset, disp, i->jmpSize);
|
||||
}
|
||||
if (alloc_->useProtect() && !protect(top_, size_, true)) throw Error(ERR_CANT_PROTECT);
|
||||
isCalledCalcJmpAddress_ = true;
|
||||
}
|
||||
public:
|
||||
enum ProtectMode {
|
||||
PROTECT_RW = 0, // read/write
|
||||
PROTECT_RWE = 1, // read/write/exec
|
||||
PROTECT_RE = 2 // read/exec
|
||||
};
|
||||
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
|
||||
: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
|
||||
: type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
|
||||
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
|
||||
, maxSize_(maxSize)
|
||||
, top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
|
||||
|
@ -861,7 +946,7 @@ public:
|
|||
, isCalledCalcJmpAddress_(false)
|
||||
{
|
||||
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
|
||||
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, true)) {
|
||||
if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
|
||||
alloc_->free(top_);
|
||||
throw Error(ERR_CANT_PROTECT);
|
||||
}
|
||||
|
@ -869,10 +954,19 @@ public:
|
|||
virtual ~CodeArray()
|
||||
{
|
||||
if (isAllocType()) {
|
||||
if (alloc_->useProtect()) protect(top_, maxSize_, false);
|
||||
if (useProtect()) setProtectModeRW(false);
|
||||
alloc_->free(top_);
|
||||
}
|
||||
}
|
||||
bool setProtectMode(ProtectMode mode, bool throwException = true)
|
||||
{
|
||||
bool isOK = protect(top_, maxSize_, mode);
|
||||
if (isOK) return true;
|
||||
if (throwException) throw Error(ERR_CANT_PROTECT);
|
||||
return false;
|
||||
}
|
||||
bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
|
||||
bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
|
||||
void resetSize()
|
||||
{
|
||||
size_ = 0;
|
||||
|
@ -904,10 +998,10 @@ public:
|
|||
void dq(uint64 code) { db(code, 8); }
|
||||
const uint8 *getCode() const { return top_; }
|
||||
template<class F>
|
||||
const F getCode() const { return CastTo<F>(top_); }
|
||||
const F getCode() const { return reinterpret_cast<F>(top_); }
|
||||
const uint8 *getCurr() const { return &top_[size_]; }
|
||||
template<class F>
|
||||
const F getCurr() const { return CastTo<F>(&top_[size_]); }
|
||||
const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
|
||||
size_t getSize() const { return size_; }
|
||||
void setSize(size_t size)
|
||||
{
|
||||
|
@ -960,19 +1054,39 @@ public:
|
|||
change exec permission of memory
|
||||
@param addr [in] buffer address
|
||||
@param size [in] buffer size
|
||||
@param canExec [in] true(enable to exec), false(disable to exec)
|
||||
@param protectMode [in] mode(RW/RWE/RE)
|
||||
@return true(success), false(failure)
|
||||
*/
|
||||
static inline bool protect(const void *addr, size_t size, bool canExec)
|
||||
static inline bool protect(const void *addr, size_t size, int protectMode)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
const DWORD c_rw = PAGE_READWRITE;
|
||||
const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
|
||||
const DWORD c_re = PAGE_EXECUTE_READ;
|
||||
DWORD mode;
|
||||
#else
|
||||
const int c_rw = PROT_READ | PROT_WRITE;
|
||||
const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
|
||||
const int c_re = PROT_READ | PROT_EXEC;
|
||||
int mode;
|
||||
#endif
|
||||
switch (protectMode) {
|
||||
case PROTECT_RW: mode = c_rw; break;
|
||||
case PROTECT_RWE: mode = c_rwe; break;
|
||||
case PROTECT_RE: mode = c_re; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
#if defined(_WIN32)
|
||||
DWORD oldProtect;
|
||||
return VirtualProtect(const_cast<void*>(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0;
|
||||
return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0;
|
||||
#elif defined(__GNUC__)
|
||||
size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||
size_t iaddr = reinterpret_cast<size_t>(addr);
|
||||
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||
int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0);
|
||||
#ifndef NDEBUG
|
||||
if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
|
||||
#endif
|
||||
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
||||
#else
|
||||
return true;
|
||||
|
@ -999,46 +1113,43 @@ public:
|
|||
M_ripAddr
|
||||
};
|
||||
Address(uint32 sizeBit, bool broadcast, const RegExp& e)
|
||||
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), permitVsib_(false), broadcast_(broadcast)
|
||||
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
|
||||
{
|
||||
e_.verify();
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
explicit Address(size_t disp)
|
||||
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false){ }
|
||||
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
|
||||
Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
|
||||
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), permitVsib_(false), broadcast_(broadcast) { }
|
||||
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
|
||||
#endif
|
||||
void permitVsib() const { permitVsib_ = true; }
|
||||
RegExp getRegExp(bool optimize = true) const
|
||||
{
|
||||
return optimize ? e_.optimize() : e_;
|
||||
}
|
||||
Mode getMode() const { return mode_; }
|
||||
bool is32bit() const { verify(); return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
|
||||
bool isOnlyDisp() const { verify(); return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
|
||||
size_t getDisp() const { verify(); return e_.getDisp(); }
|
||||
bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
|
||||
bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
|
||||
size_t getDisp() const { return e_.getDisp(); }
|
||||
uint8 getRex() const
|
||||
{
|
||||
verify();
|
||||
if (mode_ != M_ModRM) return 0;
|
||||
return getRegExp().getRex();
|
||||
}
|
||||
bool is64bitDisp() const { verify(); return mode_ == M_64bitDisp; } // for moffset
|
||||
bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
|
||||
bool isBroadcast() const { return broadcast_; }
|
||||
const Label* getLabel() const { return label_; }
|
||||
bool operator==(const Address& rhs) const
|
||||
{
|
||||
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && permitVsib_ == rhs.permitVsib_ && broadcast_ == rhs.broadcast_;
|
||||
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
|
||||
}
|
||||
bool operator!=(const Address& rhs) const { return !operator==(rhs); }
|
||||
bool isVsib() const { return e_.isVsib(); }
|
||||
private:
|
||||
RegExp e_;
|
||||
const Label* label_;
|
||||
Mode mode_;
|
||||
mutable bool permitVsib_;
|
||||
bool broadcast_;
|
||||
void verify() const { if (e_.isVsib() && !permitVsib_) throw Error(ERR_BAD_VSIB_ADDRESSING); }
|
||||
};
|
||||
|
||||
inline const Address& Operand::getAddress() const
|
||||
|
@ -1096,6 +1207,7 @@ public:
|
|||
Label(const Label& rhs);
|
||||
Label& operator=(const Label& rhs);
|
||||
~Label();
|
||||
void clear() { mgr = 0; id = 0; }
|
||||
int getId() const { return id; }
|
||||
const uint8 *getAddress() const;
|
||||
|
||||
|
@ -1134,6 +1246,7 @@ class LabelManager {
|
|||
};
|
||||
typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
|
||||
typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
|
||||
typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
|
||||
|
||||
CodeArray *base_;
|
||||
// global : stateList_.front(), local : stateList_.back()
|
||||
|
@ -1141,6 +1254,7 @@ class LabelManager {
|
|||
mutable int labelId_;
|
||||
ClabelDefList clabelDefList_;
|
||||
ClabelUndefList clabelUndefList_;
|
||||
LabelPtrList labelPtrList_;
|
||||
|
||||
int getId(const Label& label) const
|
||||
{
|
||||
|
@ -1189,9 +1303,14 @@ class LabelManager {
|
|||
return true;
|
||||
}
|
||||
friend class Label;
|
||||
void incRefCount(int id) { clabelDefList_[id].refCount++; }
|
||||
void decRefCount(int id)
|
||||
void incRefCount(int id, Label *label)
|
||||
{
|
||||
clabelDefList_[id].refCount++;
|
||||
labelPtrList_.insert(label);
|
||||
}
|
||||
void decRefCount(int id, Label *label)
|
||||
{
|
||||
labelPtrList_.erase(label);
|
||||
ClabelDefList::iterator i = clabelDefList_.find(id);
|
||||
if (i == clabelDefList_.end()) return;
|
||||
if (i->second.refCount == 1) {
|
||||
|
@ -1210,11 +1329,23 @@ class LabelManager {
|
|||
#endif
|
||||
return !list.empty();
|
||||
}
|
||||
// detach all labels linked to LabelManager
|
||||
void resetLabelPtrList()
|
||||
{
|
||||
for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
|
||||
(*i)->clear();
|
||||
}
|
||||
labelPtrList_.clear();
|
||||
}
|
||||
public:
|
||||
LabelManager()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
~LabelManager()
|
||||
{
|
||||
resetLabelPtrList();
|
||||
}
|
||||
void reset()
|
||||
{
|
||||
base_ = 0;
|
||||
|
@ -1224,6 +1355,7 @@ public:
|
|||
stateList_.push_back(SlabelState());
|
||||
clabelDefList_.clear();
|
||||
clabelUndefList_.clear();
|
||||
resetLabelPtrList();
|
||||
}
|
||||
void enterLocal()
|
||||
{
|
||||
|
@ -1256,10 +1388,11 @@ public:
|
|||
SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
|
||||
define_inner(st.defList, st.undefList, label, base_->getSize());
|
||||
}
|
||||
void defineClabel(const Label& label)
|
||||
void defineClabel(Label& label)
|
||||
{
|
||||
define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
|
||||
label.mgr = this;
|
||||
labelPtrList_.insert(&label);
|
||||
}
|
||||
void assign(Label& dst, const Label& src)
|
||||
{
|
||||
|
@ -1267,6 +1400,7 @@ public:
|
|||
if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
|
||||
define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
|
||||
dst.mgr = this;
|
||||
labelPtrList_.insert(&dst);
|
||||
}
|
||||
bool getOffset(size_t *offset, std::string& label) const
|
||||
{
|
||||
|
@ -1314,19 +1448,19 @@ inline Label::Label(const Label& rhs)
|
|||
{
|
||||
id = rhs.id;
|
||||
mgr = rhs.mgr;
|
||||
if (mgr) mgr->incRefCount(id);
|
||||
if (mgr) mgr->incRefCount(id, this);
|
||||
}
|
||||
inline Label& Label::operator=(const Label& rhs)
|
||||
{
|
||||
if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
|
||||
id = rhs.id;
|
||||
mgr = rhs.mgr;
|
||||
if (mgr) mgr->incRefCount(id);
|
||||
if (mgr) mgr->incRefCount(id, this);
|
||||
return *this;
|
||||
}
|
||||
inline Label::~Label()
|
||||
{
|
||||
if (id && mgr) mgr->decRefCount(id);
|
||||
if (id && mgr) mgr->decRefCount(id, this);
|
||||
}
|
||||
inline const uint8* Label::getAddress() const
|
||||
{
|
||||
|
@ -1443,6 +1577,8 @@ private:
|
|||
T_B32 = 1 << 26, // m32bcst
|
||||
T_B64 = 1 << 27, // m64bcst
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
T_XXX
|
||||
};
|
||||
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
||||
|
@ -1480,7 +1616,7 @@ private:
|
|||
if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
|
||||
return v;
|
||||
}
|
||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0)
|
||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
|
||||
{
|
||||
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
|
||||
int w = (type & T_EW1) ? 1 : 0;
|
||||
|
@ -1523,7 +1659,7 @@ private:
|
|||
}
|
||||
}
|
||||
}
|
||||
bool Vp = !(v ? v->isExtIdx2() : 0);
|
||||
bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
|
||||
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
|
||||
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
||||
db(0x62);
|
||||
|
@ -1607,6 +1743,14 @@ private:
|
|||
db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
|
||||
opAddr(addr, reg.getIdx(), immSize);
|
||||
}
|
||||
void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
|
||||
{
|
||||
if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
|
||||
if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
|
||||
rex(addr, reg);
|
||||
db(code0); if (code1 != NONE) db(code1);
|
||||
opAddr(addr, reg.getIdx());
|
||||
}
|
||||
void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
|
||||
{
|
||||
if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
|
||||
|
@ -1631,6 +1775,7 @@ private:
|
|||
db(longCode); dd(disp - longJmpSize);
|
||||
}
|
||||
}
|
||||
bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); }
|
||||
template<class T>
|
||||
void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
|
||||
{
|
||||
|
@ -1640,7 +1785,7 @@ private:
|
|||
makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref);
|
||||
} else {
|
||||
int jmpSize = 0;
|
||||
if (type == T_NEAR) {
|
||||
if (isNEAR(type)) {
|
||||
jmpSize = 4;
|
||||
if (longPref) db(longPref);
|
||||
db(longCode); dd(0);
|
||||
|
@ -1655,7 +1800,7 @@ private:
|
|||
void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0)
|
||||
{
|
||||
if (isAutoGrow()) {
|
||||
if (type != T_NEAR) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW);
|
||||
if (!isNEAR(type)) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW);
|
||||
if (size_ + 16 >= maxSize_) growMemory();
|
||||
if (longPref) db(longPref);
|
||||
db(longCode);
|
||||
|
@ -1669,8 +1814,9 @@ private:
|
|||
// reg is reg field of ModRM
|
||||
// immSize is the size for immediate value
|
||||
// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
|
||||
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0)
|
||||
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
|
||||
{
|
||||
if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||
if (addr.getMode() == Address::M_ModRM) {
|
||||
setSIB(addr.getRegExp(), reg, disp8N);
|
||||
} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
|
||||
|
@ -1914,10 +2060,11 @@ private:
|
|||
const Address& addr = op2.getAddress();
|
||||
const RegExp& regExp = addr.getRegExp();
|
||||
const Reg& base = regExp.getBase();
|
||||
const Reg& index = regExp.getIndex();
|
||||
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||
int disp8N = 0;
|
||||
bool x = regExp.getIndex().isExtIdx();
|
||||
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
|
||||
bool x = index.isExtIdx();
|
||||
if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
|
||||
int aaa = addr.getOpmaskIdx();
|
||||
if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
|
||||
bool b = false;
|
||||
|
@ -1925,12 +2072,12 @@ private:
|
|||
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
|
||||
b = true;
|
||||
}
|
||||
int VL = regExp.isVsib() ? regExp.getIndex().getBit() : 0;
|
||||
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
|
||||
int VL = regExp.isVsib() ? index.getBit() : 0;
|
||||
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
|
||||
} else {
|
||||
vex(r, base, p1, type, code, x);
|
||||
}
|
||||
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N);
|
||||
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
|
||||
} else {
|
||||
const Reg& base = op2.getReg();
|
||||
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
|
||||
|
@ -2031,8 +2178,7 @@ private:
|
|||
}
|
||||
if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||
}
|
||||
addr.permitVsib();
|
||||
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type | T_YMM, code);
|
||||
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code);
|
||||
}
|
||||
enum {
|
||||
xx_yy_zz = 0,
|
||||
|
@ -2056,7 +2202,6 @@ private:
|
|||
{
|
||||
if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
|
||||
checkGather2(x, addr.getRegExp().getIndex(), mode);
|
||||
addr.permitVsib();
|
||||
opVex(x, 0, addr, type, code);
|
||||
}
|
||||
/*
|
||||
|
@ -2076,9 +2221,30 @@ private:
|
|||
{
|
||||
if (addr.hasZero()) throw Error(ERR_INVALID_ZERO);
|
||||
if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||
addr.permitVsib();
|
||||
opVex(x, 0, addr, type, code);
|
||||
}
|
||||
void opInOut(const Reg& a, const Reg& d, uint8 code)
|
||||
{
|
||||
if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) {
|
||||
switch (a.getBit()) {
|
||||
case 8: db(code); return;
|
||||
case 16: db(0x66); db(code + 1); return;
|
||||
case 32: db(code + 1); return;
|
||||
}
|
||||
}
|
||||
throw Error(ERR_BAD_COMBINATION);
|
||||
}
|
||||
void opInOut(const Reg& a, uint8 code, uint8 v)
|
||||
{
|
||||
if (a.getIdx() == Operand::AL) {
|
||||
switch (a.getBit()) {
|
||||
case 8: db(code); db(v); return;
|
||||
case 16: db(0x66); db(code + 1); db(v); return;
|
||||
case 32: db(code + 1); db(v); return;
|
||||
}
|
||||
}
|
||||
throw Error(ERR_BAD_COMBINATION);
|
||||
}
|
||||
public:
|
||||
unsigned int getVersion() const { return VERSION; }
|
||||
using CodeArray::db;
|
||||
|
@ -2088,7 +2254,7 @@ public:
|
|||
const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
|
||||
const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
|
||||
const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7;
|
||||
const Ymm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
|
||||
const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
|
||||
const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
|
||||
const Reg16 ax, cx, dx, bx, sp, bp, si, di;
|
||||
const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
|
||||
|
@ -2128,8 +2294,12 @@ public:
|
|||
#ifndef XBYAK_DISABLE_SEGMENT
|
||||
const Segment es, cs, ss, ds, fs, gs;
|
||||
#endif
|
||||
private:
|
||||
bool isDefaultJmpNEAR_;
|
||||
public:
|
||||
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
||||
void L(const Label& label) { labelMgr_.defineClabel(label); }
|
||||
void L(Label& label) { labelMgr_.defineClabel(label); }
|
||||
Label L() { Label label; L(label); return label; }
|
||||
void inLocalLabel() { labelMgr_.enterLocal(); }
|
||||
void outLocalLabel() { labelMgr_.leaveLocal(); }
|
||||
/*
|
||||
|
@ -2146,6 +2316,8 @@ public:
|
|||
void putL(std::string label) { putL_inner(label); }
|
||||
void putL(const Label& label) { putL_inner(label); }
|
||||
|
||||
// set default type of `jmp` of undefined label to T_NEAR
|
||||
void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
|
||||
void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); }
|
||||
void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
|
||||
void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
|
||||
|
@ -2160,7 +2332,7 @@ public:
|
|||
// call(function pointer)
|
||||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||
template<class Ret, class... Params>
|
||||
void call(Ret(*func)(Params...)) { call(CastTo<const void*>(func)); }
|
||||
void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
|
||||
#endif
|
||||
void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
|
||||
|
||||
|
@ -2404,6 +2576,7 @@ public:
|
|||
#ifndef XBYAK_DISABLE_SEGMENT
|
||||
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
|
||||
#endif
|
||||
, isDefaultJmpNEAR_(false)
|
||||
{
|
||||
labelMgr_.set(this);
|
||||
}
|
||||
|
@ -2418,11 +2591,16 @@ public:
|
|||
MUST call ready() to complete generating code if you use AutoGrow mode.
|
||||
It is not necessary for the other mode if hasUndefinedLabel() is true.
|
||||
*/
|
||||
void ready()
|
||||
void ready(ProtectMode mode = PROTECT_RWE)
|
||||
{
|
||||
if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
|
||||
if (isAutoGrow()) calcJmpAddress();
|
||||
if (isAutoGrow()) {
|
||||
calcJmpAddress();
|
||||
if (useProtect()) setProtectMode(mode);
|
||||
}
|
||||
}
|
||||
// set read/exec
|
||||
void readyRE() { return ready(PROTECT_RE); }
|
||||
#ifdef XBYAK_TEST
|
||||
void dump(bool doClear = true)
|
||||
{
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "5.65"; }
|
||||
const char *getVersionString() const { return "5.891"; }
|
||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||
|
@ -58,7 +58,9 @@ void cdq() { db(0x99); }
|
|||
void clc() { db(0xF8); }
|
||||
void cld() { db(0xFC); }
|
||||
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
||||
void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
||||
void cli() { db(0xFA); }
|
||||
void clzero() { db(0x0F); db(0x01); db(0xFC); }
|
||||
void cmc() { db(0xF5); }
|
||||
void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524
|
||||
void cmovae(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }//-V524
|
||||
|
@ -122,8 +124,11 @@ void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); }
|
|||
void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); }
|
||||
void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
|
||||
void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
|
||||
void cmpsb() { db(0xA6); }
|
||||
void cmpsd() { db(0xA7); }
|
||||
void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
|
||||
void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
|
||||
void cmpsw() { db(0x66); db(0xA7); }
|
||||
void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); }
|
||||
void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); }
|
||||
void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); }
|
||||
|
@ -167,6 +172,7 @@ void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM
|
|||
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void emms() { db(0x0F); db(0x77); }
|
||||
void enter(uint16 x, uint8 y) { db(0xC8); dw(x); db(y); }
|
||||
void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); }
|
||||
void f2xm1() { db(0xD9); db(0xF0); }
|
||||
void fabs() { db(0xD9); db(0xE1); }
|
||||
|
@ -176,7 +182,10 @@ void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC
|
|||
void faddp() { db(0xDE); db(0xC1); }
|
||||
void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); }
|
||||
void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
|
||||
void fbld(const Address& addr) { opModM(addr, Reg32(4), 0xDF, 0x100); }
|
||||
void fbstp(const Address& addr) { opModM(addr, Reg32(6), 0xDF, 0x100); }
|
||||
void fchs() { db(0xD9); db(0xE0); }
|
||||
void fclex() { db(0x9B); db(0xDB); db(0xE2); }
|
||||
void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); }
|
||||
void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
|
||||
void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); }
|
||||
|
@ -237,6 +246,7 @@ void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); }
|
|||
void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); }
|
||||
void fld1() { db(0xD9); db(0xE8); }
|
||||
void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); }
|
||||
void fldenv(const Address& addr) { opModM(addr, Reg32(4), 0xD9, 0x100); }
|
||||
void fldl2e() { db(0xD9); db(0xEA); }
|
||||
void fldl2t() { db(0xD9); db(0xE9); }
|
||||
void fldlg2() { db(0xD9); db(0xEC); }
|
||||
|
@ -249,22 +259,33 @@ void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC
|
|||
void fmulp() { db(0xDE); db(0xC9); }
|
||||
void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); }
|
||||
void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
|
||||
void fnclex() { db(0xDB); db(0xE2); }
|
||||
void fninit() { db(0xDB); db(0xE3); }
|
||||
void fnop() { db(0xD9); db(0xD0); }
|
||||
void fnsave(const Address& addr) { opModM(addr, Reg32(6), 0xDD, 0x100); }
|
||||
void fnstcw(const Address& addr) { opModM(addr, Reg32(7), 0xD9, 0x100); }
|
||||
void fnstenv(const Address& addr) { opModM(addr, Reg32(6), 0xD9, 0x100); }
|
||||
void fnstsw(const Address& addr) { opModM(addr, Reg32(7), 0xDD, 0x100); }
|
||||
void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0xDF); db(0xE0); }
|
||||
void fpatan() { db(0xD9); db(0xF3); }
|
||||
void fprem() { db(0xD9); db(0xF8); }
|
||||
void fprem1() { db(0xD9); db(0xF5); }
|
||||
void fptan() { db(0xD9); db(0xF2); }
|
||||
void frndint() { db(0xD9); db(0xFC); }
|
||||
void frstor(const Address& addr) { opModM(addr, Reg32(4), 0xDD, 0x100); }
|
||||
void fsave(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xDD, 0x100); }
|
||||
void fscale() { db(0xD9); db(0xFD); }
|
||||
void fsin() { db(0xD9); db(0xFE); }
|
||||
void fsincos() { db(0xD9); db(0xFB); }
|
||||
void fsqrt() { db(0xD9); db(0xFA); }
|
||||
void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
|
||||
void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); }
|
||||
void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); }
|
||||
void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, 0x100); }
|
||||
void fstenv(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xD9, 0x100); }
|
||||
void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
|
||||
void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
|
||||
void fstsw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xDD, 0x100); }
|
||||
void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) throw Error(ERR_BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }
|
||||
void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
|
||||
void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); }
|
||||
void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
|
||||
|
@ -291,6 +312,7 @@ void fwait() { db(0x9B); }
|
|||
void fxam() { db(0xD9); db(0xE5); }
|
||||
void fxch() { db(0xD9); db(0xC9); }
|
||||
void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
|
||||
void fxrstor(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xAE); }
|
||||
void fxtract() { db(0xD9); db(0xF4); }
|
||||
void fyl2x() { db(0xD9); db(0xF1); }
|
||||
void fyl2xp1() { db(0xD9); db(0xF9); }
|
||||
|
@ -303,8 +325,12 @@ void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXM
|
|||
void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
|
||||
void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
|
||||
void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); }
|
||||
void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }
|
||||
void in_(const Reg& a, uint8 v) { opInOut(a, 0xE4, v); }
|
||||
void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
|
||||
void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
|
||||
void int3() { db(0xCC); }
|
||||
void int_(uint8 x) { db(0xCD); db(x); }
|
||||
void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524
|
||||
void ja(const char *label, LabelType type = T_AUTO) { ja(std::string(label), type); }//-V524
|
||||
void ja(const void *addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }//-V524
|
||||
|
@ -429,8 +455,24 @@ void lahf() { db(0x9F); }
|
|||
void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }
|
||||
void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
|
||||
void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }
|
||||
void leave() { db(0xC9); }
|
||||
void lfence() { db(0x0F); db(0xAE); db(0xE8); }
|
||||
void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); }
|
||||
void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); }
|
||||
void lock() { db(0xF0); }
|
||||
void lodsb() { db(0xAC); }
|
||||
void lodsd() { db(0xAD); }
|
||||
void lodsw() { db(0x66); db(0xAD); }
|
||||
void loop(const Label& label) { opJmp(label, T_SHORT, 0xE2, 0, 0); }
|
||||
void loop(const char *label) { loop(std::string(label)); }
|
||||
void loop(std::string label) { opJmp(label, T_SHORT, 0xE2, 0, 0); }
|
||||
void loope(const Label& label) { opJmp(label, T_SHORT, 0xE1, 0, 0); }
|
||||
void loope(const char *label) { loope(std::string(label)); }
|
||||
void loope(std::string label) { opJmp(label, T_SHORT, 0xE1, 0, 0); }
|
||||
void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
|
||||
void loopne(const char *label) { loopne(std::string(label)); }
|
||||
void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
|
||||
void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); }
|
||||
void lzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
|
||||
void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); }
|
||||
void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }
|
||||
|
@ -444,6 +486,7 @@ void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXM
|
|||
void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); }
|
||||
void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); }
|
||||
void monitor() { db(0x0F); db(0x01); db(0xC8); }
|
||||
void monitorx() { db(0x0F); db(0x01); db(0xFA); }
|
||||
void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); }
|
||||
void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); }
|
||||
void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); }
|
||||
|
@ -500,12 +543,18 @@ void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM
|
|||
void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); }
|
||||
void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf6, true); }
|
||||
void mwait() { db(0x0F); db(0x01); db(0xC9); }
|
||||
void mwaitx() { db(0x0F); db(0x01); db(0xFB); }
|
||||
void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); }
|
||||
void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
|
||||
void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
|
||||
void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
|
||||
void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); }
|
||||
void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); }
|
||||
void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }
|
||||
void out_(uint8 v, const Reg& a) { opInOut(a, 0xE6, v); }
|
||||
void outsb() { db(0x6E); }
|
||||
void outsd() { db(0x6F); }
|
||||
void outsw() { db(0x66); db(0x6F); }
|
||||
void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); }
|
||||
void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); }
|
||||
void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); }
|
||||
|
@ -663,6 +712,10 @@ void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER
|
|||
void rdtsc() { db(0x0F); db(0x31); }
|
||||
void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
|
||||
void rep() { db(0xF3); }
|
||||
void repe() { db(0xF3); }
|
||||
void repne() { db(0xF2); }
|
||||
void repnz() { db(0xF2); }
|
||||
void repz() { db(0xF3); }
|
||||
void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
|
||||
void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
|
||||
void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
|
||||
|
@ -683,6 +736,9 @@ void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
|
|||
void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); }
|
||||
void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
|
||||
void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
|
||||
void scasb() { db(0xAE); }
|
||||
void scasd() { db(0xAF); }
|
||||
void scasw() { db(0x66); db(0xAF); }
|
||||
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
|
||||
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
|
||||
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
|
||||
|
@ -742,12 +798,17 @@ void stc() { db(0xF9); }
|
|||
void std() { db(0xFD); }
|
||||
void sti() { db(0xFB); }
|
||||
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
|
||||
void stosb() { db(0xAA); }
|
||||
void stosd() { db(0xAB); }
|
||||
void stosw() { db(0x66); db(0xAB); }
|
||||
void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
|
||||
void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
|
||||
void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
|
||||
void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); }
|
||||
void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); }
|
||||
void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
|
||||
void sysenter() { db(0x0F); db(0x34); }
|
||||
void sysexit() { db(0x0F); db(0x35); }
|
||||
void tzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); }
|
||||
void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
|
||||
void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
|
||||
|
@ -1001,10 +1062,10 @@ void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_X
|
|||
void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBE); }
|
||||
void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBF); }
|
||||
void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBF); }
|
||||
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x92, 0); }
|
||||
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x92, 1); }
|
||||
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x93, 1); }
|
||||
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x93, 2); }
|
||||
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); }
|
||||
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); }
|
||||
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); }
|
||||
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); }
|
||||
void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
|
||||
void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
|
||||
void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
|
||||
|
@ -1014,7 +1075,7 @@ void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
|
|||
void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
|
||||
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
|
||||
void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
|
||||
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
|
||||
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
|
||||
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
|
||||
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
|
||||
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
|
||||
|
@ -1135,10 +1196,10 @@ void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|
|
|||
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
|
||||
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
|
||||
void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
|
||||
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x90, 1); }
|
||||
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x90, 0); }
|
||||
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x91, 2); }
|
||||
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x91, 1); }
|
||||
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); }
|
||||
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); }
|
||||
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); }
|
||||
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x91, 1); }
|
||||
void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x02); }
|
||||
void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x03); }
|
||||
void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x01); }
|
||||
|
@ -1197,28 +1258,28 @@ void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm,
|
|||
void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
|
||||
void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
|
||||
void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
|
||||
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
||||
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||
void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
|
||||
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
|
||||
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
|
||||
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
|
||||
void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
|
||||
void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
|
||||
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
||||
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||
void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
|
||||
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
||||
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||
void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
|
||||
void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
|
||||
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
||||
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||
void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
|
||||
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
||||
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||
void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
|
||||
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
|
||||
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
|
||||
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
|
||||
void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
|
||||
void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
|
||||
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
||||
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||
void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
|
||||
void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
|
||||
void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
|
||||
|
@ -1544,8 +1605,17 @@ void jrcxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
|||
void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
||||
void cdqe() { db(0x48); db(0x98); }
|
||||
void cqo() { db(0x48); db(0x99); }
|
||||
void cmpsq() { db(0x48); db(0xA7); }
|
||||
void popfq() { db(0x9D); }
|
||||
void pushfq() { db(0x9C); }
|
||||
void lodsq() { db(0x48); db(0xAD); }
|
||||
void movsq() { db(0x48); db(0xA5); }
|
||||
void scasq() { db(0x48); db(0xAF); }
|
||||
void stosq() { db(0x48); db(0xAB); }
|
||||
void syscall() { db(0x0F); db(0x05); }
|
||||
void sysret() { db(0x0F); db(0x07); }
|
||||
void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
|
||||
void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); }
|
||||
void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
|
||||
void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
|
||||
void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
|
||||
|
@ -1568,12 +1638,15 @@ void aam() { db(0xD4); db(0x0A); }
|
|||
void aas() { db(0x3F); }
|
||||
void daa() { db(0x27); }
|
||||
void das() { db(0x2F); }
|
||||
void into() { db(0xCE); }
|
||||
void popad() { db(0x61); }
|
||||
void popfd() { db(0x9D); }
|
||||
void pusha() { db(0x60); }
|
||||
void pushad() { db(0x60); }
|
||||
void pushfd() { db(0x9C); }
|
||||
void popa() { db(0x61); }
|
||||
void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC5, 0x100); }
|
||||
void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100); }
|
||||
#endif
|
||||
#ifndef XBYAK_NO_OP_NAMES
|
||||
void and(const Operand& op1, const Operand& op2) { and_(op1, op2); }
|
||||
|
@ -1664,14 +1737,16 @@ void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_
|
|||
void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }
|
||||
void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }
|
||||
void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }
|
||||
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); }
|
||||
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); }
|
||||
void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
|
||||
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
|
||||
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
||||
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }
|
||||
void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
||||
|
@ -1697,6 +1772,7 @@ void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T
|
|||
void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
||||
void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
||||
void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
|
||||
void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
|
||||
void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
|
||||
void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
|
||||
void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); }
|
||||
|
@ -1713,22 +1789,22 @@ void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { o
|
|||
void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
|
||||
void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
||||
void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
||||
void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
|
||||
void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
||||
void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
|
||||
void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
||||
void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
|
||||
void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
|
||||
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x92, 1); }
|
||||
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x92, 0); }
|
||||
void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
|
||||
void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
|
||||
void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
|
||||
void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
|
||||
void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
|
||||
void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
|
||||
void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
|
||||
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
|
||||
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x93, 0); }
|
||||
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x93, 2); }
|
||||
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
|
||||
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
|
||||
void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
|
||||
void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
|
||||
void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||
void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||
void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
|
||||
void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
|
||||
void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
|
||||
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
|
||||
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
|
||||
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
|
||||
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
||||
|
@ -1757,6 +1833,8 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3
|
|||
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
|
||||
void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) throw Error(ERR_OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
|
||||
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
|
||||
void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
|
||||
void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
|
||||
|
@ -1815,10 +1893,10 @@ void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1 | T
|
|||
void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); }
|
||||
void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); }
|
||||
void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
|
||||
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x90, 0); }
|
||||
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x90, 1); }
|
||||
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x91, 2); }
|
||||
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x91, 0); }
|
||||
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 0); }
|
||||
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 1); }
|
||||
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 2); }
|
||||
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
|
||||
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
|
||||
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
|
||||
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
|
||||
|
@ -1869,10 +1947,10 @@ void vprord(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.get
|
|||
void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
|
||||
void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); }
|
||||
void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); }
|
||||
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 0); }
|
||||
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 1); }
|
||||
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 2); }
|
||||
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 0); }
|
||||
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); }
|
||||
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); }
|
||||
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); }
|
||||
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); }
|
||||
void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
|
||||
void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
|
||||
void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
|
||||
|
@ -1936,18 +2014,18 @@ void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x
|
|||
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
|
||||
void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||
void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 1); }
|
||||
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 0); }
|
||||
void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
|
||||
void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
|
||||
void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
|
||||
void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
|
||||
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
|
||||
void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
|
||||
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
|
||||
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
|
||||
void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 0); }
|
||||
void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 2); }
|
||||
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
|
||||
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
|
||||
void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
|
||||
void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
|
||||
void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||
void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
|
||||
void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
|
||||
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
|
||||
void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); }
|
||||
void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); }
|
||||
void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
|
||||
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
|
||||
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
|
|
@ -1,5 +1,6 @@
|
|||
#ifndef XBYAK_XBYAK_UTIL_H_
|
||||
#define XBYAK_XBYAK_UTIL_H_
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
utility class and functions for Xbyak
|
||||
|
@ -9,6 +10,11 @@
|
|||
*/
|
||||
#include "xbyak.h"
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||
#define XBYAK_INTEL_CPU_SPECIFIC
|
||||
#endif
|
||||
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
||||
static inline __declspec(naked) void __cpuid(int[4], int)
|
||||
|
@ -47,14 +53,44 @@
|
|||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef XBYAK_USE_VTUNE
|
||||
// -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
|
||||
#include <jitprofiling.h>
|
||||
#ifdef _MSC_VER
|
||||
#pragma comment(lib, "libittnotify.lib")
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
#define XBYAK_USE_PERF
|
||||
#endif
|
||||
|
||||
namespace Xbyak { namespace util {
|
||||
|
||||
typedef enum {
|
||||
SmtLevel = 1,
|
||||
CoreLevel = 2
|
||||
} IntelCpuTopologyLevel;
|
||||
|
||||
/**
|
||||
CPU detection class
|
||||
*/
|
||||
class Cpu {
|
||||
uint64 type_;
|
||||
//system topology
|
||||
bool x2APIC_supported_;
|
||||
static const size_t maxTopologyLevels = 2;
|
||||
unsigned int numCores_[maxTopologyLevels];
|
||||
|
||||
static const unsigned int maxNumberCacheLevels = 10;
|
||||
unsigned int dataCacheSize_[maxNumberCacheLevels];
|
||||
unsigned int coresSharignDataCache_[maxNumberCacheLevels];
|
||||
unsigned int dataCacheLevels_;
|
||||
|
||||
unsigned int get32bitAsBE(const char *x) const
|
||||
{
|
||||
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||
|
@ -65,7 +101,7 @@ class Cpu {
|
|||
}
|
||||
void setFamily()
|
||||
{
|
||||
unsigned int data[4];
|
||||
unsigned int data[4] = {};
|
||||
getCpuid(1, data);
|
||||
stepping = data[0] & mask(4);
|
||||
model = (data[0] >> 4) & mask(4);
|
||||
|
@ -88,6 +124,44 @@ class Cpu {
|
|||
{
|
||||
return (val >> base) & ((1u << (end - base)) - 1);
|
||||
}
|
||||
void setNumCores()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
|
||||
unsigned int data[4] = {};
|
||||
|
||||
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
/*
|
||||
if leaf 11 exists(x2APIC is supported),
|
||||
we use it to get the number of smt cores and cores on socket
|
||||
|
||||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
x2APIC_supported_ = true;
|
||||
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
|
||||
getCpuidEx(0xB, i, data);
|
||||
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
|
||||
if (level == SmtLevel || level == CoreLevel) {
|
||||
numCores_[level - 1] = extractBit(data[1], 0, 15);
|
||||
}
|
||||
}
|
||||
/*
|
||||
Fallback values in case a hypervisor has 0xB leaf zeroed-out.
|
||||
*/
|
||||
numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]);
|
||||
numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
|
||||
} else {
|
||||
/*
|
||||
Failed to deremine num of cores without x2APIC support.
|
||||
TODO: USE initial APIC ID to determine ncores.
|
||||
*/
|
||||
numCores_[SmtLevel - 1] = 0;
|
||||
numCores_[CoreLevel - 1] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
void setCacheHierarchy()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
|
@ -96,21 +170,12 @@ class Cpu {
|
|||
// const unsigned int INSTRUCTION_CACHE = 2;
|
||||
const unsigned int UNIFIED_CACHE = 3;
|
||||
unsigned int smt_width = 0;
|
||||
unsigned int n_cores = 0;
|
||||
unsigned int data[4];
|
||||
unsigned int logical_cores = 0;
|
||||
unsigned int data[4] = {};
|
||||
|
||||
/*
|
||||
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
|
||||
If x2APIC is supported, these are the only correct numbers.
|
||||
|
||||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
|
||||
smt_width = data[1] & 0x7FFF;
|
||||
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
|
||||
n_cores = data[1] & 0x7FFF;
|
||||
if (x2APIC_supported_) {
|
||||
smt_width = numCores_[0];
|
||||
logical_cores = numCores_[1];
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -118,29 +183,29 @@ class Cpu {
|
|||
the first level of data cache is not shared (which is the
|
||||
case for every existing architecture) and use this to
|
||||
determine the SMT width for arch not supporting leaf 11.
|
||||
when leaf 4 reports a number of core less than n_cores
|
||||
when leaf 4 reports a number of core less than numCores_
|
||||
on socket reported by leaf 11, then it is a correct number
|
||||
of cores not an upperbound.
|
||||
*/
|
||||
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
|
||||
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
|
||||
getCpuidEx(0x4, i, data);
|
||||
unsigned int cacheType = extractBit(data[0], 0, 4);
|
||||
if (cacheType == NO_CACHE) break;
|
||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||
unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (n_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
|
||||
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
||||
}
|
||||
assert(nb_logical_cores != 0);
|
||||
data_cache_size[data_cache_levels] =
|
||||
assert(actual_logical_cores != 0);
|
||||
dataCacheSize_[dataCacheLevels_] =
|
||||
(extractBit(data[1], 22, 31) + 1)
|
||||
* (extractBit(data[1], 12, 21) + 1)
|
||||
* (extractBit(data[1], 0, 11) + 1)
|
||||
* (data[2] + 1);
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
||||
assert(smt_width != 0);
|
||||
cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width;
|
||||
data_cache_levels++;
|
||||
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
|
||||
dataCacheLevels_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -154,22 +219,25 @@ public:
|
|||
int displayFamily; // family + extFamily
|
||||
int displayModel; // model + extModel
|
||||
|
||||
// may I move these members into private?
|
||||
static const unsigned int maxNumberCacheLevels = 10;
|
||||
unsigned int data_cache_size[maxNumberCacheLevels];
|
||||
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
|
||||
unsigned int data_cache_levels;
|
||||
unsigned int getNumCores(IntelCpuTopologyLevel level) {
|
||||
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||
switch (level) {
|
||||
case SmtLevel: return numCores_[level - 1];
|
||||
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
|
||||
default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int getDataCacheLevels() const { return data_cache_levels; }
|
||||
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
|
||||
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||
{
|
||||
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||
return cores_sharing_data_cache[i];
|
||||
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||
return coresSharignDataCache_[i];
|
||||
}
|
||||
unsigned int getDataCacheSize(unsigned int i) const
|
||||
{
|
||||
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||
return data_cache_size[i];
|
||||
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||
return dataCacheSize_[i];
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -177,30 +245,45 @@ public:
|
|||
*/
|
||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||
#else
|
||||
#else
|
||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
#else
|
||||
(void)eaxIn;
|
||||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
||||
#else
|
||||
#else
|
||||
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
#else
|
||||
(void)eaxIn;
|
||||
(void)ecxIn;
|
||||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline uint64 getXfeature()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
return _xgetbv(0);
|
||||
#else
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
// xgetvb is not support on gcc 4.2
|
||||
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
typedef uint64 Type;
|
||||
|
@ -268,12 +351,18 @@ public:
|
|||
static const Type tAVX512_VNNI = uint64(1) << 54;
|
||||
static const Type tAVX512_BITALG = uint64(1) << 55;
|
||||
static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
|
||||
static const Type tAVX512_BF16 = uint64(1) << 57;
|
||||
static const Type tAVX512_VP2INTERSECT = uint64(1) << 58;
|
||||
|
||||
Cpu()
|
||||
: type_(NONE)
|
||||
, data_cache_levels(0)
|
||||
, x2APIC_supported_(false)
|
||||
, numCores_()
|
||||
, dataCacheSize_()
|
||||
, coresSharignDataCache_()
|
||||
, dataCacheLevels_(0)
|
||||
{
|
||||
unsigned int data[4];
|
||||
unsigned int data[4] = {};
|
||||
const unsigned int& EAX = data[0];
|
||||
const unsigned int& EBX = data[1];
|
||||
const unsigned int& ECX = data[2];
|
||||
|
@ -343,6 +432,12 @@ public:
|
|||
if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
|
||||
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
|
||||
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
|
||||
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
|
||||
}
|
||||
// EAX=07H, ECX=1
|
||||
getCpuidEx(7, 1, data);
|
||||
if (type_ & tAVX512F) {
|
||||
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -363,6 +458,7 @@ public:
|
|||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
}
|
||||
setFamily();
|
||||
setNumCores();
|
||||
setCacheHierarchy();
|
||||
}
|
||||
void putFamily() const
|
||||
|
@ -381,12 +477,17 @@ class Clock {
|
|||
public:
|
||||
static inline uint64 getRdtsc()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
Clock()
|
||||
|
@ -416,7 +517,7 @@ const int UseRCX = 1 << 6;
|
|||
const int UseRDX = 1 << 7;
|
||||
|
||||
class Pack {
|
||||
static const size_t maxTblNum = 10;
|
||||
static const size_t maxTblNum = 15;
|
||||
const Xbyak::Reg64 *tbl_[maxTblNum];
|
||||
size_t n_;
|
||||
public:
|
||||
|
@ -476,7 +577,7 @@ public:
|
|||
const Xbyak::Reg64& operator[](size_t n) const
|
||||
{
|
||||
if (n >= n_) {
|
||||
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
|
||||
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
}
|
||||
return *tbl_[n];
|
||||
|
@ -518,6 +619,7 @@ class StackFrame {
|
|||
static const int rcxPos = 3;
|
||||
static const int rdxPos = 2;
|
||||
#endif
|
||||
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
|
||||
Xbyak::CodeGenerator *code_;
|
||||
int pNum_;
|
||||
int tNum_;
|
||||
|
@ -527,7 +629,7 @@ class StackFrame {
|
|||
int P_;
|
||||
bool makeEpilog_;
|
||||
Xbyak::Reg64 pTbl_[4];
|
||||
Xbyak::Reg64 tTbl_[10];
|
||||
Xbyak::Reg64 tTbl_[maxRegNum];
|
||||
Pack p_;
|
||||
Pack t_;
|
||||
StackFrame(const StackFrame&);
|
||||
|
@ -539,7 +641,7 @@ public:
|
|||
make stack frame
|
||||
@param sf [in] this
|
||||
@param pNum [in] num of function parameter(0 <= pNum <= 4)
|
||||
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
|
||||
@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
|
||||
@param stackSizeByte [in] local stack size
|
||||
@param makeEpilog [in] automatically call close() if true
|
||||
|
||||
|
@ -566,27 +668,17 @@ public:
|
|||
using namespace Xbyak;
|
||||
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
|
||||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||
if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
|
||||
if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
|
||||
const Reg64& _rsp = code->rsp;
|
||||
const AddressFrame& _ptr = code->ptr;
|
||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
P_ = saveNum_ + (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->push(Reg64(tbl[i]));
|
||||
}
|
||||
P_ = (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
P_ *= 8;
|
||||
if (P_ > 0) code->sub(_rsp, P_);
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#endif
|
||||
int pos = 0;
|
||||
for (int i = 0; i < pNum; i++) {
|
||||
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
|
||||
|
@ -607,36 +699,18 @@ public:
|
|||
{
|
||||
using namespace Xbyak;
|
||||
const Reg64& _rsp = code_->rsp;
|
||||
const AddressFrame& _ptr = code_->ptr;
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#endif
|
||||
if (P_ > 0) code_->add(_rsp, P_);
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
|
||||
}
|
||||
|
||||
if (callRet) code_->ret();
|
||||
}
|
||||
~StackFrame()
|
||||
{
|
||||
if (!makeEpilog_) return;
|
||||
try {
|
||||
close();
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:StackFrame %s\n", e.what());
|
||||
exit(1);
|
||||
} catch (...) {
|
||||
printf("ERR:StackFrame otherwise\n");
|
||||
exit(1);
|
||||
}
|
||||
close();
|
||||
}
|
||||
private:
|
||||
const int *getOrderTbl() const
|
||||
|
@ -654,7 +728,7 @@ private:
|
|||
}
|
||||
int getRegIdx(int& pos) const
|
||||
{
|
||||
assert(pos < 14);
|
||||
assert(pos < maxRegNum);
|
||||
using namespace Xbyak;
|
||||
const int *tbl = getOrderTbl();
|
||||
int r = tbl[pos++];
|
||||
|
@ -671,5 +745,135 @@ private:
|
|||
};
|
||||
#endif
|
||||
|
||||
class Profiler {
|
||||
int mode_;
|
||||
const char *suffix_;
|
||||
const void *startAddr_;
|
||||
#ifdef XBYAK_USE_PERF
|
||||
FILE *fp_;
|
||||
#endif
|
||||
public:
|
||||
enum {
|
||||
None = 0,
|
||||
Perf = 1,
|
||||
VTune = 2
|
||||
};
|
||||
Profiler()
|
||||
: mode_(None)
|
||||
, suffix_("")
|
||||
, startAddr_(0)
|
||||
#ifdef XBYAK_USE_PERF
|
||||
, fp_(0)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
// append suffix to funcName
|
||||
void setNameSuffix(const char *suffix)
|
||||
{
|
||||
suffix_ = suffix;
|
||||
}
|
||||
void setStartAddr(const void *startAddr)
|
||||
{
|
||||
startAddr_ = startAddr;
|
||||
}
|
||||
void init(int mode)
|
||||
{
|
||||
mode_ = None;
|
||||
switch (mode) {
|
||||
default:
|
||||
case None:
|
||||
return;
|
||||
case Perf:
|
||||
#ifdef XBYAK_USE_PERF
|
||||
close();
|
||||
{
|
||||
const int pid = getpid();
|
||||
char name[128];
|
||||
snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid);
|
||||
fp_ = fopen(name, "a+");
|
||||
if (fp_ == 0) {
|
||||
fprintf(stderr, "can't open %s\n", name);
|
||||
return;
|
||||
}
|
||||
}
|
||||
mode_ = Perf;
|
||||
#endif
|
||||
return;
|
||||
case VTune:
|
||||
#ifdef XBYAK_USE_VTUNE
|
||||
dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling
|
||||
if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) {
|
||||
fprintf(stderr, "VTune profiling is not active\n");
|
||||
return;
|
||||
}
|
||||
mode_ = VTune;
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
~Profiler()
|
||||
{
|
||||
close();
|
||||
}
|
||||
void close()
|
||||
{
|
||||
#ifdef XBYAK_USE_PERF
|
||||
if (fp_ == 0) return;
|
||||
fclose(fp_);
|
||||
fp_ = 0;
|
||||
#endif
|
||||
}
|
||||
void set(const char *funcName, const void *startAddr, size_t funcSize) const
|
||||
{
|
||||
if (mode_ == None) return;
|
||||
#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE)
|
||||
(void)funcName;
|
||||
(void)startAddr;
|
||||
(void)funcSize;
|
||||
#endif
|
||||
#ifdef XBYAK_USE_PERF
|
||||
if (mode_ == Perf) {
|
||||
if (fp_ == 0) return;
|
||||
fprintf(fp_, "%llx %zx %s%s", (long long)startAddr, funcSize, funcName, suffix_);
|
||||
/*
|
||||
perf does not recognize the function name which is less than 3,
|
||||
so append '_' at the end of the name if necessary
|
||||
*/
|
||||
size_t n = strlen(funcName) + strlen(suffix_);
|
||||
for (size_t i = n; i < 3; i++) {
|
||||
fprintf(fp_, "_");
|
||||
}
|
||||
fprintf(fp_, "\n");
|
||||
fflush(fp_);
|
||||
}
|
||||
#endif
|
||||
#ifdef XBYAK_USE_VTUNE
|
||||
if (mode_ != VTune) return;
|
||||
char className[] = "";
|
||||
char fileName[] = "";
|
||||
iJIT_Method_Load jmethod = {};
|
||||
jmethod.method_id = iJIT_GetNewMethodID();
|
||||
jmethod.class_file_name = className;
|
||||
jmethod.source_file_name = fileName;
|
||||
jmethod.method_load_address = const_cast<void*>(startAddr);
|
||||
jmethod.method_size = funcSize;
|
||||
jmethod.line_number_size = 0;
|
||||
char buf[128];
|
||||
snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_);
|
||||
jmethod.method_name = buf;
|
||||
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod);
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
for continuous set
|
||||
funcSize = endAddr - <previous set endAddr>
|
||||
*/
|
||||
void set(const char *funcName, const void *endAddr)
|
||||
{
|
||||
set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_);
|
||||
startAddr_ = endAddr;
|
||||
}
|
||||
};
|
||||
|
||||
} } // end of util
|
||||
#endif
|
|
@ -21,8 +21,7 @@
|
|||
|
||||
#if HOST_CPU == CPU_X64 && FEAT_DSPREC != DYNAREC_NONE
|
||||
|
||||
#include "deps/xbyak/xbyak.h"
|
||||
#include "deps/xbyak/xbyak_util.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
#include "dsp.h"
|
||||
#include "hw/aica/aica_if.h"
|
||||
#include "hw/mem/_vmem.h"
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
//#define PROFILING
|
||||
//#define CANONICAL_TEST
|
||||
|
||||
#include "deps/xbyak/xbyak.h"
|
||||
#include "deps/xbyak/xbyak_util.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "hw/sh4/sh4_opcode_list.h"
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
//#define OLD_REGALLOC
|
||||
|
||||
#include "deps/xbyak/xbyak.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
#ifdef OLD_REGALLOC
|
||||
#include "hw/sh4/dyna/regalloc.h"
|
||||
#else
|
||||
|
|
|
@ -3036,7 +3036,6 @@
|
|||
TARGET_NO_NIXPROF,
|
||||
TARGET_NO_COREIO_HTTP,
|
||||
TARGET_NO_AREC,
|
||||
XBYAK_NO_OP_NAMES,
|
||||
TARGET_NO_OPENMP,
|
||||
ENABLE_MODEM,
|
||||
CHD5_LZMA,
|
||||
|
@ -3090,7 +3089,6 @@
|
|||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
TARGET_NO_NIXPROF,
|
||||
TARGET_NO_COREIO_HTTP,
|
||||
XBYAK_NO_OP_NAMES,
|
||||
TARGET_NO_OPENMP,
|
||||
ENABLE_MODEM,
|
||||
CHD5_LZMA,
|
||||
|
@ -3150,6 +3148,7 @@
|
|||
../../../core/deps/zlib,
|
||||
../../../core/deps/glslang,
|
||||
../../../core/deps/glm,
|
||||
../../../core/deps/xbyak,
|
||||
/usr/local/include,
|
||||
);
|
||||
INFOPLIST_FILE = "emulator-osx/Info.plist";
|
||||
|
@ -3198,6 +3197,7 @@
|
|||
../../../core/deps/zlib,
|
||||
../../../core/deps/glslang,
|
||||
../../../core/deps/glm,
|
||||
../../../core/deps/xbyak,
|
||||
/usr/local/include,
|
||||
);
|
||||
INFOPLIST_FILE = "emulator-osx/Info.plist";
|
||||
|
@ -3283,7 +3283,6 @@
|
|||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
TARGET_NO_NIXPROF,
|
||||
TARGET_NO_COREIO_HTTP,
|
||||
XBYAK_NO_OP_NAMES,
|
||||
TARGET_NO_OPENMP,
|
||||
ENABLE_MODEM,
|
||||
CHD5_LZMA,
|
||||
|
@ -3344,6 +3343,7 @@
|
|||
../../../core/deps/zlib,
|
||||
../../../core/deps/glslang,
|
||||
../../../core/deps/glm,
|
||||
../../../core/deps/xbyak,
|
||||
/usr/local/include,
|
||||
);
|
||||
INFOPLIST_FILE = "emulator-osx/Info.plist";
|
||||
|
@ -3410,7 +3410,6 @@
|
|||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
TARGET_NO_NIXPROF,
|
||||
TARGET_NO_COREIO_HTTP,
|
||||
XBYAK_NO_OP_NAMES,
|
||||
TARGET_NO_OPENMP,
|
||||
ENABLE_MODEM,
|
||||
CHD5_LZMA,
|
||||
|
@ -3465,6 +3464,7 @@
|
|||
../../../core/deps/zlib,
|
||||
../../../core/deps/glslang,
|
||||
../../../core/deps/glm,
|
||||
../../../core/deps/xbyak,
|
||||
/usr/local/include,
|
||||
);
|
||||
INFOPLIST_FILE = "emulator-osx/Info.plist";
|
||||
|
|
Loading…
Reference in New Issue