A lot of progress on altivec instructions.
Some bad results, still, and some instructions are not decoding right.
This commit is contained in:
parent
3662118246
commit
26ec76ef35
|
@ -111,6 +111,49 @@ void _cdecl XeTraceBranch(
|
||||||
(uint32_t)cia, (uint32_t)target_ia);
|
(uint32_t)cia, (uint32_t)target_ia);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void _cdecl XeTraceVR(
|
||||||
|
xe_ppc_state_t* state, uint64_t vr0, uint64_t vr1, uint64_t vr2,
|
||||||
|
uint64_t vr3, uint64_t vr4) {
|
||||||
|
char buffer[2048];
|
||||||
|
buffer[0] = 0;
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||||
|
"%.8X:", state->cia);
|
||||||
|
|
||||||
|
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||||
|
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr0,
|
||||||
|
state->v[vr0].ix, state->v[vr0].iy, state->v[vr0].iz, state->v[vr0].iw,
|
||||||
|
state->v[vr0].x, state->v[vr0].y, state->v[vr0].z, state->v[vr0].w);
|
||||||
|
if (vr1 != UINT_MAX) {
|
||||||
|
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||||
|
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr1,
|
||||||
|
state->v[vr1].ix, state->v[vr1].iy, state->v[vr1].iz, state->v[vr1].iw,
|
||||||
|
state->v[vr1].x, state->v[vr1].y, state->v[vr1].z, state->v[vr1].w);
|
||||||
|
}
|
||||||
|
if (vr2 != UINT_MAX) {
|
||||||
|
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||||
|
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr2,
|
||||||
|
state->v[vr2].ix, state->v[vr2].iy, state->v[vr2].iz, state->v[vr2].iw,
|
||||||
|
state->v[vr2].x, state->v[vr2].y, state->v[vr2].z, state->v[vr2].w);
|
||||||
|
}
|
||||||
|
if (vr3 != UINT_MAX) {
|
||||||
|
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||||
|
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr3,
|
||||||
|
state->v[vr3].ix, state->v[vr3].iy, state->v[vr3].iz, state->v[vr3].iw,
|
||||||
|
state->v[vr3].x, state->v[vr3].y, state->v[vr3].z, state->v[vr3].w);
|
||||||
|
}
|
||||||
|
if (vr4 != UINT_MAX) {
|
||||||
|
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||||
|
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr4,
|
||||||
|
state->v[vr4].ix, state->v[vr4].iy, state->v[vr4].iz, state->v[vr4].iw,
|
||||||
|
state->v[vr4].x, state->v[vr4].y, state->v[vr4].z, state->v[vr4].w);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t thread_id = state->thread_state->thread_id();
|
||||||
|
xe_log_line("", thread_id, "XeTraceVR", 't', buffer);
|
||||||
|
}
|
||||||
|
|
||||||
void _cdecl XeTraceInstruction(
|
void _cdecl XeTraceInstruction(
|
||||||
xe_ppc_state_t* state, uint64_t cia, uint64_t data) {
|
xe_ppc_state_t* state, uint64_t cia, uint64_t data) {
|
||||||
char buffer[2048];
|
char buffer[2048];
|
||||||
|
@ -184,5 +227,6 @@ void xe::cpu::GetGlobalExports(GlobalExports* global_exports) {
|
||||||
global_exports->XeTraceKernelCall = XeTraceKernelCall;
|
global_exports->XeTraceKernelCall = XeTraceKernelCall;
|
||||||
global_exports->XeTraceUserCall = XeTraceUserCall;
|
global_exports->XeTraceUserCall = XeTraceUserCall;
|
||||||
global_exports->XeTraceBranch = XeTraceBranch;
|
global_exports->XeTraceBranch = XeTraceBranch;
|
||||||
|
global_exports->XeTraceVR = XeTraceVR;
|
||||||
global_exports->XeTraceInstruction = XeTraceInstruction;
|
global_exports->XeTraceInstruction = XeTraceInstruction;
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,9 @@ typedef struct {
|
||||||
sdb::FunctionSymbol* fn);
|
sdb::FunctionSymbol* fn);
|
||||||
void (_cdecl *XeTraceBranch)(
|
void (_cdecl *XeTraceBranch)(
|
||||||
xe_ppc_state_t* state, uint64_t cia, uint64_t target_ia);
|
xe_ppc_state_t* state, uint64_t cia, uint64_t target_ia);
|
||||||
|
void (_cdecl *XeTraceVR)(
|
||||||
|
xe_ppc_state_t* state, uint64_t vr0, uint64_t vr1, uint64_t vr2,
|
||||||
|
uint64_t vr3, uint64_t vr4);
|
||||||
void (_cdecl *XeTraceInstruction)(
|
void (_cdecl *XeTraceInstruction)(
|
||||||
xe_ppc_state_t* state, uint64_t cia, uint64_t data);
|
xe_ppc_state_t* state, uint64_t cia, uint64_t data);
|
||||||
} GlobalExports;
|
} GlobalExports;
|
||||||
|
|
|
@ -48,7 +48,14 @@ typedef struct XECACHEALIGN xe_float4 {
|
||||||
float z;
|
float z;
|
||||||
float w;
|
float w;
|
||||||
};
|
};
|
||||||
|
struct {
|
||||||
|
uint32_t ix;
|
||||||
|
uint32_t iy;
|
||||||
|
uint32_t iz;
|
||||||
|
uint32_t iw;
|
||||||
|
};
|
||||||
float f4[4];
|
float f4[4];
|
||||||
|
uint32_t i4[4];
|
||||||
struct {
|
struct {
|
||||||
uint64_t low;
|
uint64_t low;
|
||||||
uint64_t high;
|
uint64_t high;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -74,6 +74,11 @@ X64Emitter::X64Emitter(xe_memory_ref memory) :
|
||||||
assembler_.setLogger(logger_);
|
assembler_.setLogger(logger_);
|
||||||
compiler_.setLogger(logger_);
|
compiler_.setLogger(logger_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Grab CPU feature mask so we can quickly check it in emitter code.
|
||||||
|
const CpuInfo* cpu = CpuInfo::getGlobal();
|
||||||
|
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
|
||||||
|
cpu_feature_mask_ = cpu->getFeatures();
|
||||||
}
|
}
|
||||||
|
|
||||||
X64Emitter::~X64Emitter() {
|
X64Emitter::~X64Emitter() {
|
||||||
|
@ -906,6 +911,47 @@ void X64Emitter::TraceBranch(uint32_t cia) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void X64Emitter::TraceVR(uint32_t vr0, uint32_t vr1, uint32_t vr2,
|
||||||
|
uint32_t vr3, uint32_t vr4) {
|
||||||
|
X86Compiler& c = compiler_;
|
||||||
|
|
||||||
|
for (int n = 0; n < 5; n++) {
|
||||||
|
c.nop();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FLAGS_annotate_disassembly) {
|
||||||
|
c.comment("XeTraceVR (+spill)");
|
||||||
|
}
|
||||||
|
|
||||||
|
SpillRegisters();
|
||||||
|
|
||||||
|
// TODO(benvanik): remove once fixed: https://code.google.com/p/asmjit/issues/detail?id=86
|
||||||
|
GpVar arg1 = c.newGpVar(kX86VarTypeGpq);
|
||||||
|
c.mov(arg1, imm(vr0));
|
||||||
|
GpVar arg2 = c.newGpVar(kX86VarTypeGpq);
|
||||||
|
c.mov(arg2, imm(vr1));
|
||||||
|
GpVar arg3 = c.newGpVar(kX86VarTypeGpq);
|
||||||
|
c.mov(arg3, imm(vr2));
|
||||||
|
GpVar arg4 = c.newGpVar(kX86VarTypeGpq);
|
||||||
|
c.mov(arg4, imm(vr3));
|
||||||
|
GpVar arg5 = c.newGpVar(kX86VarTypeGpq);
|
||||||
|
c.mov(arg5, imm(vr4));
|
||||||
|
X86CompilerFuncCall* call = c.call(global_exports_.XeTraceVR);
|
||||||
|
call->setPrototype(kX86FuncConvDefault,
|
||||||
|
FuncBuilder6<void, void*,
|
||||||
|
uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>());
|
||||||
|
call->setArgument(0, c.getGpArg(0));
|
||||||
|
call->setArgument(1, arg1);
|
||||||
|
call->setArgument(2, arg2);
|
||||||
|
call->setArgument(3, arg3);
|
||||||
|
call->setArgument(4, arg4);
|
||||||
|
call->setArgument(5, arg5);
|
||||||
|
|
||||||
|
for (int n = 0; n < 2; n++) {
|
||||||
|
c.nop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int X64Emitter::GenerateIndirectionBranch(uint32_t cia, GpVar& target,
|
int X64Emitter::GenerateIndirectionBranch(uint32_t cia, GpVar& target,
|
||||||
bool lk, bool likely_local) {
|
bool lk, bool likely_local) {
|
||||||
X86Compiler& c = compiler_;
|
X86Compiler& c = compiler_;
|
||||||
|
@ -1221,9 +1267,9 @@ void X64Emitter::FillRegisters() {
|
||||||
if (FLAGS_annotate_disassembly) {
|
if (FLAGS_annotate_disassembly) {
|
||||||
c.comment("Filling vr%d", n);
|
c.comment("Filling vr%d", n);
|
||||||
}
|
}
|
||||||
c.movq(locals_.vr[n],
|
c.movaps(locals_.vr[n],
|
||||||
xmmword_ptr(c.getGpArg(0),
|
xmmword_ptr(c.getGpArg(0),
|
||||||
offsetof(xe_ppc_state_t, v) + 16 * n));
|
offsetof(xe_ppc_state_t, v) + 16 * n));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1324,9 +1370,9 @@ void X64Emitter::SpillRegisters() {
|
||||||
if (FLAGS_annotate_disassembly) {
|
if (FLAGS_annotate_disassembly) {
|
||||||
c.comment("Spilling vr%d", n);
|
c.comment("Spilling vr%d", n);
|
||||||
}
|
}
|
||||||
c.movq(xmmword_ptr(c.getGpArg(0),
|
c.movaps(xmmword_ptr(c.getGpArg(0),
|
||||||
offsetof(xe_ppc_state_t, v) + 16 * n),
|
offsetof(xe_ppc_state_t, v) + 16 * n),
|
||||||
v);
|
v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1669,8 +1715,8 @@ XmmVar X64Emitter::vr_value(uint32_t n) {
|
||||||
return locals_.vr[n];
|
return locals_.vr[n];
|
||||||
} else {
|
} else {
|
||||||
XmmVar value(c.newXmmVar());
|
XmmVar value(c.newXmmVar());
|
||||||
c.movq(value,
|
c.movaps(value,
|
||||||
xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n));
|
xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n));
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1680,10 +1726,10 @@ void X64Emitter::update_vr_value(uint32_t n, XmmVar& value) {
|
||||||
XEASSERT(n >= 0 && n < 128);
|
XEASSERT(n >= 0 && n < 128);
|
||||||
if (FLAGS_cache_registers) {
|
if (FLAGS_cache_registers) {
|
||||||
XEASSERT(locals_.vr[n].getId() != kInvalidValue);
|
XEASSERT(locals_.vr[n].getId() != kInvalidValue);
|
||||||
c.movq(locals_.vr[n], value);
|
c.movaps(locals_.vr[n], value);
|
||||||
} else {
|
} else {
|
||||||
c.movq(xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n),
|
c.movaps(xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n),
|
||||||
value);
|
value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1793,7 +1839,7 @@ XmmVar X64Emitter::ReadMemoryXmm(
|
||||||
GpVar real_address = TouchMemoryAddress(cia, addr);
|
GpVar real_address = TouchMemoryAddress(cia, addr);
|
||||||
|
|
||||||
XmmVar value(c.newXmmVar());
|
XmmVar value(c.newXmmVar());
|
||||||
c.movq(value, xmmword_ptr(real_address));
|
c.movaps(value, xmmword_ptr(real_address));
|
||||||
|
|
||||||
// Byte swap.
|
// Byte swap.
|
||||||
// http://www.asmcommunity.net/forums/topic/?id=29743
|
// http://www.asmcommunity.net/forums/topic/?id=29743
|
||||||
|
@ -1874,7 +1920,7 @@ void X64Emitter::WriteMemoryXmm(
|
||||||
c.psllw(value, imm(8));
|
c.psllw(value, imm(8));
|
||||||
c.por(value, temp);
|
c.por(value, temp);
|
||||||
|
|
||||||
c.movq(xmmword_ptr(real_address), value);
|
c.movaps(xmmword_ptr(real_address), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
GpVar X64Emitter::get_uint64(uint64_t value) {
|
GpVar X64Emitter::get_uint64(uint64_t value) {
|
||||||
|
|
|
@ -36,6 +36,8 @@ public:
|
||||||
void Lock();
|
void Lock();
|
||||||
void Unlock();
|
void Unlock();
|
||||||
|
|
||||||
|
uint32_t cpu_feature_mask() const { return cpu_feature_mask_; }
|
||||||
|
|
||||||
int PrepareFunction(sdb::FunctionSymbol* symbol);
|
int PrepareFunction(sdb::FunctionSymbol* symbol);
|
||||||
int MakeFunction(sdb::FunctionSymbol* symbol);
|
int MakeFunction(sdb::FunctionSymbol* symbol);
|
||||||
|
|
||||||
|
@ -53,6 +55,8 @@ public:
|
||||||
void TraceInstruction(ppc::InstrData& i);
|
void TraceInstruction(ppc::InstrData& i);
|
||||||
void TraceInvalidInstruction(ppc::InstrData& i);
|
void TraceInvalidInstruction(ppc::InstrData& i);
|
||||||
void TraceBranch(uint32_t cia);
|
void TraceBranch(uint32_t cia);
|
||||||
|
void TraceVR(uint32_t vr0, uint32_t vr1 = UINT_MAX, uint32_t vr2 = UINT_MAX,
|
||||||
|
uint32_t vr3 = UINT_MAX, uint32_t vr4 = UINT_MAX);
|
||||||
|
|
||||||
int GenerateIndirectionBranch(uint32_t cia, AsmJit::GpVar& target,
|
int GenerateIndirectionBranch(uint32_t cia, AsmJit::GpVar& target,
|
||||||
bool lk, bool likely_local);
|
bool lk, bool likely_local);
|
||||||
|
@ -128,6 +132,7 @@ private:
|
||||||
xe_memory_ref memory_;
|
xe_memory_ref memory_;
|
||||||
GlobalExports global_exports_;
|
GlobalExports global_exports_;
|
||||||
xe_mutex_t* lock_;
|
xe_mutex_t* lock_;
|
||||||
|
uint32_t cpu_feature_mask_;
|
||||||
|
|
||||||
void* gpu_this_;
|
void* gpu_this_;
|
||||||
void* gpu_read_;
|
void* gpu_read_;
|
||||||
|
|
|
@ -100,8 +100,29 @@ static const BitDescription x86Features[] = {
|
||||||
int X64JIT::CheckProcessor() {
|
int X64JIT::CheckProcessor() {
|
||||||
const CpuInfo* cpu = CpuInfo::getGlobal();
|
const CpuInfo* cpu = CpuInfo::getGlobal();
|
||||||
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
|
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
|
||||||
|
const uint32_t mask = cpu->getFeatures();
|
||||||
|
|
||||||
|
// TODO(benvanik): ensure features we want are supported.
|
||||||
|
|
||||||
|
// TODO(benvanik): check for SSE modes we use.
|
||||||
|
if (!(mask & kX86FeatureSse3)) {
|
||||||
|
XELOGE("CPU does not support SSE3+ instructions!");
|
||||||
|
DumpCPUInfo();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!(mask & kX86FeatureSse41)) {
|
||||||
|
XELOGW("CPU does not support SSE4.1+ instructions, performance degraded!");
|
||||||
|
DumpCPUInfo();
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void X64JIT::DumpCPUInfo() {
|
||||||
|
const CpuInfo* cpu = CpuInfo::getGlobal();
|
||||||
|
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
|
||||||
|
const uint32_t mask = cpu->getFeatures();
|
||||||
|
|
||||||
#if 0
|
|
||||||
XELOGCPU("Processor Info:");
|
XELOGCPU("Processor Info:");
|
||||||
XELOGCPU(" Vendor string : %s", cpu->getVendorString());
|
XELOGCPU(" Vendor string : %s", cpu->getVendorString());
|
||||||
XELOGCPU(" Brand string : %s", cpu->getBrandString());
|
XELOGCPU(" Brand string : %s", cpu->getBrandString());
|
||||||
|
@ -117,17 +138,11 @@ int X64JIT::CheckProcessor() {
|
||||||
XELOGCPU(" Max logical Processors: %u", x86Cpu->getMaxLogicalProcessors());
|
XELOGCPU(" Max logical Processors: %u", x86Cpu->getMaxLogicalProcessors());
|
||||||
XELOGCPU(" APIC Physical ID : %u", x86Cpu->getApicPhysicalId());
|
XELOGCPU(" APIC Physical ID : %u", x86Cpu->getApicPhysicalId());
|
||||||
XELOGCPU(" Features:");
|
XELOGCPU(" Features:");
|
||||||
uint32_t mask = cpu->getFeatures();
|
|
||||||
for (const BitDescription* d = x86Features; d->mask; d++) {
|
for (const BitDescription* d = x86Features; d->mask; d++) {
|
||||||
if (mask & d->mask) {
|
if (mask & d->mask) {
|
||||||
XELOGCPU(" %s", d->description);
|
XELOGCPU(" %s", d->description);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
// TODO(benvanik): ensure features we want are supported.
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int X64JIT::InitModule(ExecModule* module) {
|
int X64JIT::InitModule(ExecModule* module) {
|
||||||
|
|
|
@ -41,6 +41,7 @@ public:
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int CheckProcessor();
|
int CheckProcessor();
|
||||||
|
void DumpCPUInfo();
|
||||||
|
|
||||||
X64Emitter* emitter_;
|
X64Emitter* emitter_;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue