A lot of progress on altivec instructions.
Some bad results, still, and some instructions are not decoding right.
This commit is contained in:
parent
3662118246
commit
26ec76ef35
|
@ -111,6 +111,49 @@ void _cdecl XeTraceBranch(
|
|||
(uint32_t)cia, (uint32_t)target_ia);
|
||||
}
|
||||
|
||||
void _cdecl XeTraceVR(
|
||||
xe_ppc_state_t* state, uint64_t vr0, uint64_t vr1, uint64_t vr2,
|
||||
uint64_t vr3, uint64_t vr4) {
|
||||
char buffer[2048];
|
||||
buffer[0] = 0;
|
||||
int offset = 0;
|
||||
|
||||
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||
"%.8X:", state->cia);
|
||||
|
||||
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr0,
|
||||
state->v[vr0].ix, state->v[vr0].iy, state->v[vr0].iz, state->v[vr0].iw,
|
||||
state->v[vr0].x, state->v[vr0].y, state->v[vr0].z, state->v[vr0].w);
|
||||
if (vr1 != UINT_MAX) {
|
||||
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr1,
|
||||
state->v[vr1].ix, state->v[vr1].iy, state->v[vr1].iz, state->v[vr1].iw,
|
||||
state->v[vr1].x, state->v[vr1].y, state->v[vr1].z, state->v[vr1].w);
|
||||
}
|
||||
if (vr2 != UINT_MAX) {
|
||||
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr2,
|
||||
state->v[vr2].ix, state->v[vr2].iy, state->v[vr2].iz, state->v[vr2].iw,
|
||||
state->v[vr2].x, state->v[vr2].y, state->v[vr2].z, state->v[vr2].w);
|
||||
}
|
||||
if (vr3 != UINT_MAX) {
|
||||
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr3,
|
||||
state->v[vr3].ix, state->v[vr3].iy, state->v[vr3].iz, state->v[vr3].iw,
|
||||
state->v[vr3].x, state->v[vr3].y, state->v[vr3].z, state->v[vr3].w);
|
||||
}
|
||||
if (vr4 != UINT_MAX) {
|
||||
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
|
||||
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr4,
|
||||
state->v[vr4].ix, state->v[vr4].iy, state->v[vr4].iz, state->v[vr4].iw,
|
||||
state->v[vr4].x, state->v[vr4].y, state->v[vr4].z, state->v[vr4].w);
|
||||
}
|
||||
|
||||
uint32_t thread_id = state->thread_state->thread_id();
|
||||
xe_log_line("", thread_id, "XeTraceVR", 't', buffer);
|
||||
}
|
||||
|
||||
void _cdecl XeTraceInstruction(
|
||||
xe_ppc_state_t* state, uint64_t cia, uint64_t data) {
|
||||
char buffer[2048];
|
||||
|
@ -184,5 +227,6 @@ void xe::cpu::GetGlobalExports(GlobalExports* global_exports) {
|
|||
global_exports->XeTraceKernelCall = XeTraceKernelCall;
|
||||
global_exports->XeTraceUserCall = XeTraceUserCall;
|
||||
global_exports->XeTraceBranch = XeTraceBranch;
|
||||
global_exports->XeTraceVR = XeTraceVR;
|
||||
global_exports->XeTraceInstruction = XeTraceInstruction;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,9 @@ typedef struct {
|
|||
sdb::FunctionSymbol* fn);
|
||||
void (_cdecl *XeTraceBranch)(
|
||||
xe_ppc_state_t* state, uint64_t cia, uint64_t target_ia);
|
||||
void (_cdecl *XeTraceVR)(
|
||||
xe_ppc_state_t* state, uint64_t vr0, uint64_t vr1, uint64_t vr2,
|
||||
uint64_t vr3, uint64_t vr4);
|
||||
void (_cdecl *XeTraceInstruction)(
|
||||
xe_ppc_state_t* state, uint64_t cia, uint64_t data);
|
||||
} GlobalExports;
|
||||
|
|
|
@ -48,7 +48,14 @@ typedef struct XECACHEALIGN xe_float4 {
|
|||
float z;
|
||||
float w;
|
||||
};
|
||||
struct {
|
||||
uint32_t ix;
|
||||
uint32_t iy;
|
||||
uint32_t iz;
|
||||
uint32_t iw;
|
||||
};
|
||||
float f4[4];
|
||||
uint32_t i4[4];
|
||||
struct {
|
||||
uint64_t low;
|
||||
uint64_t high;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -74,6 +74,11 @@ X64Emitter::X64Emitter(xe_memory_ref memory) :
|
|||
assembler_.setLogger(logger_);
|
||||
compiler_.setLogger(logger_);
|
||||
}
|
||||
|
||||
// Grab CPU feature mask so we can quickly check it in emitter code.
|
||||
const CpuInfo* cpu = CpuInfo::getGlobal();
|
||||
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
|
||||
cpu_feature_mask_ = cpu->getFeatures();
|
||||
}
|
||||
|
||||
X64Emitter::~X64Emitter() {
|
||||
|
@ -906,6 +911,47 @@ void X64Emitter::TraceBranch(uint32_t cia) {
|
|||
}
|
||||
}
|
||||
|
||||
void X64Emitter::TraceVR(uint32_t vr0, uint32_t vr1, uint32_t vr2,
|
||||
uint32_t vr3, uint32_t vr4) {
|
||||
X86Compiler& c = compiler_;
|
||||
|
||||
for (int n = 0; n < 5; n++) {
|
||||
c.nop();
|
||||
}
|
||||
|
||||
if (FLAGS_annotate_disassembly) {
|
||||
c.comment("XeTraceVR (+spill)");
|
||||
}
|
||||
|
||||
SpillRegisters();
|
||||
|
||||
// TODO(benvanik): remove once fixed: https://code.google.com/p/asmjit/issues/detail?id=86
|
||||
GpVar arg1 = c.newGpVar(kX86VarTypeGpq);
|
||||
c.mov(arg1, imm(vr0));
|
||||
GpVar arg2 = c.newGpVar(kX86VarTypeGpq);
|
||||
c.mov(arg2, imm(vr1));
|
||||
GpVar arg3 = c.newGpVar(kX86VarTypeGpq);
|
||||
c.mov(arg3, imm(vr2));
|
||||
GpVar arg4 = c.newGpVar(kX86VarTypeGpq);
|
||||
c.mov(arg4, imm(vr3));
|
||||
GpVar arg5 = c.newGpVar(kX86VarTypeGpq);
|
||||
c.mov(arg5, imm(vr4));
|
||||
X86CompilerFuncCall* call = c.call(global_exports_.XeTraceVR);
|
||||
call->setPrototype(kX86FuncConvDefault,
|
||||
FuncBuilder6<void, void*,
|
||||
uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>());
|
||||
call->setArgument(0, c.getGpArg(0));
|
||||
call->setArgument(1, arg1);
|
||||
call->setArgument(2, arg2);
|
||||
call->setArgument(3, arg3);
|
||||
call->setArgument(4, arg4);
|
||||
call->setArgument(5, arg5);
|
||||
|
||||
for (int n = 0; n < 2; n++) {
|
||||
c.nop();
|
||||
}
|
||||
}
|
||||
|
||||
int X64Emitter::GenerateIndirectionBranch(uint32_t cia, GpVar& target,
|
||||
bool lk, bool likely_local) {
|
||||
X86Compiler& c = compiler_;
|
||||
|
@ -1221,9 +1267,9 @@ void X64Emitter::FillRegisters() {
|
|||
if (FLAGS_annotate_disassembly) {
|
||||
c.comment("Filling vr%d", n);
|
||||
}
|
||||
c.movq(locals_.vr[n],
|
||||
xmmword_ptr(c.getGpArg(0),
|
||||
offsetof(xe_ppc_state_t, v) + 16 * n));
|
||||
c.movaps(locals_.vr[n],
|
||||
xmmword_ptr(c.getGpArg(0),
|
||||
offsetof(xe_ppc_state_t, v) + 16 * n));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1324,9 +1370,9 @@ void X64Emitter::SpillRegisters() {
|
|||
if (FLAGS_annotate_disassembly) {
|
||||
c.comment("Spilling vr%d", n);
|
||||
}
|
||||
c.movq(xmmword_ptr(c.getGpArg(0),
|
||||
offsetof(xe_ppc_state_t, v) + 16 * n),
|
||||
v);
|
||||
c.movaps(xmmword_ptr(c.getGpArg(0),
|
||||
offsetof(xe_ppc_state_t, v) + 16 * n),
|
||||
v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1669,8 +1715,8 @@ XmmVar X64Emitter::vr_value(uint32_t n) {
|
|||
return locals_.vr[n];
|
||||
} else {
|
||||
XmmVar value(c.newXmmVar());
|
||||
c.movq(value,
|
||||
xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n));
|
||||
c.movaps(value,
|
||||
xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n));
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
@ -1680,10 +1726,10 @@ void X64Emitter::update_vr_value(uint32_t n, XmmVar& value) {
|
|||
XEASSERT(n >= 0 && n < 128);
|
||||
if (FLAGS_cache_registers) {
|
||||
XEASSERT(locals_.vr[n].getId() != kInvalidValue);
|
||||
c.movq(locals_.vr[n], value);
|
||||
c.movaps(locals_.vr[n], value);
|
||||
} else {
|
||||
c.movq(xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n),
|
||||
value);
|
||||
c.movaps(xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n),
|
||||
value);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1793,7 +1839,7 @@ XmmVar X64Emitter::ReadMemoryXmm(
|
|||
GpVar real_address = TouchMemoryAddress(cia, addr);
|
||||
|
||||
XmmVar value(c.newXmmVar());
|
||||
c.movq(value, xmmword_ptr(real_address));
|
||||
c.movaps(value, xmmword_ptr(real_address));
|
||||
|
||||
// Byte swap.
|
||||
// http://www.asmcommunity.net/forums/topic/?id=29743
|
||||
|
@ -1874,7 +1920,7 @@ void X64Emitter::WriteMemoryXmm(
|
|||
c.psllw(value, imm(8));
|
||||
c.por(value, temp);
|
||||
|
||||
c.movq(xmmword_ptr(real_address), value);
|
||||
c.movaps(xmmword_ptr(real_address), value);
|
||||
}
|
||||
|
||||
GpVar X64Emitter::get_uint64(uint64_t value) {
|
||||
|
|
|
@ -36,6 +36,8 @@ public:
|
|||
void Lock();
|
||||
void Unlock();
|
||||
|
||||
uint32_t cpu_feature_mask() const { return cpu_feature_mask_; }
|
||||
|
||||
int PrepareFunction(sdb::FunctionSymbol* symbol);
|
||||
int MakeFunction(sdb::FunctionSymbol* symbol);
|
||||
|
||||
|
@ -53,6 +55,8 @@ public:
|
|||
void TraceInstruction(ppc::InstrData& i);
|
||||
void TraceInvalidInstruction(ppc::InstrData& i);
|
||||
void TraceBranch(uint32_t cia);
|
||||
void TraceVR(uint32_t vr0, uint32_t vr1 = UINT_MAX, uint32_t vr2 = UINT_MAX,
|
||||
uint32_t vr3 = UINT_MAX, uint32_t vr4 = UINT_MAX);
|
||||
|
||||
int GenerateIndirectionBranch(uint32_t cia, AsmJit::GpVar& target,
|
||||
bool lk, bool likely_local);
|
||||
|
@ -128,6 +132,7 @@ private:
|
|||
xe_memory_ref memory_;
|
||||
GlobalExports global_exports_;
|
||||
xe_mutex_t* lock_;
|
||||
uint32_t cpu_feature_mask_;
|
||||
|
||||
void* gpu_this_;
|
||||
void* gpu_read_;
|
||||
|
|
|
@ -100,8 +100,29 @@ static const BitDescription x86Features[] = {
|
|||
int X64JIT::CheckProcessor() {
|
||||
const CpuInfo* cpu = CpuInfo::getGlobal();
|
||||
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
|
||||
const uint32_t mask = cpu->getFeatures();
|
||||
|
||||
// TODO(benvanik): ensure features we want are supported.
|
||||
|
||||
// TODO(benvanik): check for SSE modes we use.
|
||||
if (!(mask & kX86FeatureSse3)) {
|
||||
XELOGE("CPU does not support SSE3+ instructions!");
|
||||
DumpCPUInfo();
|
||||
return 1;
|
||||
}
|
||||
if (!(mask & kX86FeatureSse41)) {
|
||||
XELOGW("CPU does not support SSE4.1+ instructions, performance degraded!");
|
||||
DumpCPUInfo();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void X64JIT::DumpCPUInfo() {
|
||||
const CpuInfo* cpu = CpuInfo::getGlobal();
|
||||
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
|
||||
const uint32_t mask = cpu->getFeatures();
|
||||
|
||||
#if 0
|
||||
XELOGCPU("Processor Info:");
|
||||
XELOGCPU(" Vendor string : %s", cpu->getVendorString());
|
||||
XELOGCPU(" Brand string : %s", cpu->getBrandString());
|
||||
|
@ -117,17 +138,11 @@ int X64JIT::CheckProcessor() {
|
|||
XELOGCPU(" Max logical Processors: %u", x86Cpu->getMaxLogicalProcessors());
|
||||
XELOGCPU(" APIC Physical ID : %u", x86Cpu->getApicPhysicalId());
|
||||
XELOGCPU(" Features:");
|
||||
uint32_t mask = cpu->getFeatures();
|
||||
for (const BitDescription* d = x86Features; d->mask; d++) {
|
||||
if (mask & d->mask) {
|
||||
XELOGCPU(" %s", d->description);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO(benvanik): ensure features we want are supported.
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int X64JIT::InitModule(ExecModule* module) {
|
||||
|
|
|
@ -41,6 +41,7 @@ public:
|
|||
|
||||
protected:
|
||||
int CheckProcessor();
|
||||
void DumpCPUInfo();
|
||||
|
||||
X64Emitter* emitter_;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue