A lot of progress on altivec instructions.

Some bad results, still, and some instructions are not decoding right.
This commit is contained in:
Ben Vanik 2013-09-29 21:05:48 -07:00
parent 3662118246
commit 26ec76ef35
8 changed files with 655 additions and 331 deletions

View File

@ -111,6 +111,49 @@ void _cdecl XeTraceBranch(
(uint32_t)cia, (uint32_t)target_ia);
}
void _cdecl XeTraceVR(
xe_ppc_state_t* state, uint64_t vr0, uint64_t vr1, uint64_t vr2,
uint64_t vr3, uint64_t vr4) {
char buffer[2048];
buffer[0] = 0;
int offset = 0;
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
"%.8X:", state->cia);
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr0,
state->v[vr0].ix, state->v[vr0].iy, state->v[vr0].iz, state->v[vr0].iw,
state->v[vr0].x, state->v[vr0].y, state->v[vr0].z, state->v[vr0].w);
if (vr1 != UINT_MAX) {
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr1,
state->v[vr1].ix, state->v[vr1].iy, state->v[vr1].iz, state->v[vr1].iw,
state->v[vr1].x, state->v[vr1].y, state->v[vr1].z, state->v[vr1].w);
}
if (vr2 != UINT_MAX) {
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr2,
state->v[vr2].ix, state->v[vr2].iy, state->v[vr2].iz, state->v[vr2].iw,
state->v[vr2].x, state->v[vr2].y, state->v[vr2].z, state->v[vr2].w);
}
if (vr3 != UINT_MAX) {
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr3,
state->v[vr3].ix, state->v[vr3].iy, state->v[vr3].iz, state->v[vr3].iw,
state->v[vr3].x, state->v[vr3].y, state->v[vr3].z, state->v[vr3].w);
}
if (vr4 != UINT_MAX) {
offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset,
"\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr4,
state->v[vr4].ix, state->v[vr4].iy, state->v[vr4].iz, state->v[vr4].iw,
state->v[vr4].x, state->v[vr4].y, state->v[vr4].z, state->v[vr4].w);
}
uint32_t thread_id = state->thread_state->thread_id();
xe_log_line("", thread_id, "XeTraceVR", 't', buffer);
}
void _cdecl XeTraceInstruction(
xe_ppc_state_t* state, uint64_t cia, uint64_t data) {
char buffer[2048];
@ -184,5 +227,6 @@ void xe::cpu::GetGlobalExports(GlobalExports* global_exports) {
global_exports->XeTraceKernelCall = XeTraceKernelCall;
global_exports->XeTraceUserCall = XeTraceUserCall;
global_exports->XeTraceBranch = XeTraceBranch;
global_exports->XeTraceVR = XeTraceVR;
global_exports->XeTraceInstruction = XeTraceInstruction;
}

View File

@ -39,6 +39,9 @@ typedef struct {
sdb::FunctionSymbol* fn);
void (_cdecl *XeTraceBranch)(
xe_ppc_state_t* state, uint64_t cia, uint64_t target_ia);
void (_cdecl *XeTraceVR)(
xe_ppc_state_t* state, uint64_t vr0, uint64_t vr1, uint64_t vr2,
uint64_t vr3, uint64_t vr4);
void (_cdecl *XeTraceInstruction)(
xe_ppc_state_t* state, uint64_t cia, uint64_t data);
} GlobalExports;

View File

@ -48,7 +48,14 @@ typedef struct XECACHEALIGN xe_float4 {
float z;
float w;
};
struct {
uint32_t ix;
uint32_t iy;
uint32_t iz;
uint32_t iw;
};
float f4[4];
uint32_t i4[4];
struct {
uint64_t low;
uint64_t high;

File diff suppressed because it is too large Load Diff

View File

@ -74,6 +74,11 @@ X64Emitter::X64Emitter(xe_memory_ref memory) :
assembler_.setLogger(logger_);
compiler_.setLogger(logger_);
}
// Grab CPU feature mask so we can quickly check it in emitter code.
const CpuInfo* cpu = CpuInfo::getGlobal();
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
cpu_feature_mask_ = cpu->getFeatures();
}
X64Emitter::~X64Emitter() {
@ -906,6 +911,47 @@ void X64Emitter::TraceBranch(uint32_t cia) {
}
}
void X64Emitter::TraceVR(uint32_t vr0, uint32_t vr1, uint32_t vr2,
uint32_t vr3, uint32_t vr4) {
X86Compiler& c = compiler_;
for (int n = 0; n < 5; n++) {
c.nop();
}
if (FLAGS_annotate_disassembly) {
c.comment("XeTraceVR (+spill)");
}
SpillRegisters();
// TODO(benvanik): remove once fixed: https://code.google.com/p/asmjit/issues/detail?id=86
GpVar arg1 = c.newGpVar(kX86VarTypeGpq);
c.mov(arg1, imm(vr0));
GpVar arg2 = c.newGpVar(kX86VarTypeGpq);
c.mov(arg2, imm(vr1));
GpVar arg3 = c.newGpVar(kX86VarTypeGpq);
c.mov(arg3, imm(vr2));
GpVar arg4 = c.newGpVar(kX86VarTypeGpq);
c.mov(arg4, imm(vr3));
GpVar arg5 = c.newGpVar(kX86VarTypeGpq);
c.mov(arg5, imm(vr4));
X86CompilerFuncCall* call = c.call(global_exports_.XeTraceVR);
call->setPrototype(kX86FuncConvDefault,
FuncBuilder6<void, void*,
uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>());
call->setArgument(0, c.getGpArg(0));
call->setArgument(1, arg1);
call->setArgument(2, arg2);
call->setArgument(3, arg3);
call->setArgument(4, arg4);
call->setArgument(5, arg5);
for (int n = 0; n < 2; n++) {
c.nop();
}
}
int X64Emitter::GenerateIndirectionBranch(uint32_t cia, GpVar& target,
bool lk, bool likely_local) {
X86Compiler& c = compiler_;
@ -1221,9 +1267,9 @@ void X64Emitter::FillRegisters() {
if (FLAGS_annotate_disassembly) {
c.comment("Filling vr%d", n);
}
c.movq(locals_.vr[n],
xmmword_ptr(c.getGpArg(0),
offsetof(xe_ppc_state_t, v) + 16 * n));
c.movaps(locals_.vr[n],
xmmword_ptr(c.getGpArg(0),
offsetof(xe_ppc_state_t, v) + 16 * n));
}
}
}
@ -1324,9 +1370,9 @@ void X64Emitter::SpillRegisters() {
if (FLAGS_annotate_disassembly) {
c.comment("Spilling vr%d", n);
}
c.movq(xmmword_ptr(c.getGpArg(0),
offsetof(xe_ppc_state_t, v) + 16 * n),
v);
c.movaps(xmmword_ptr(c.getGpArg(0),
offsetof(xe_ppc_state_t, v) + 16 * n),
v);
}
}
}
@ -1669,8 +1715,8 @@ XmmVar X64Emitter::vr_value(uint32_t n) {
return locals_.vr[n];
} else {
XmmVar value(c.newXmmVar());
c.movq(value,
xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n));
c.movaps(value,
xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n));
return value;
}
}
@ -1680,10 +1726,10 @@ void X64Emitter::update_vr_value(uint32_t n, XmmVar& value) {
XEASSERT(n >= 0 && n < 128);
if (FLAGS_cache_registers) {
XEASSERT(locals_.vr[n].getId() != kInvalidValue);
c.movq(locals_.vr[n], value);
c.movaps(locals_.vr[n], value);
} else {
c.movq(xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n),
value);
c.movaps(xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n),
value);
}
}
@ -1793,7 +1839,7 @@ XmmVar X64Emitter::ReadMemoryXmm(
GpVar real_address = TouchMemoryAddress(cia, addr);
XmmVar value(c.newXmmVar());
c.movq(value, xmmword_ptr(real_address));
c.movaps(value, xmmword_ptr(real_address));
// Byte swap.
// http://www.asmcommunity.net/forums/topic/?id=29743
@ -1874,7 +1920,7 @@ void X64Emitter::WriteMemoryXmm(
c.psllw(value, imm(8));
c.por(value, temp);
c.movq(xmmword_ptr(real_address), value);
c.movaps(xmmword_ptr(real_address), value);
}
GpVar X64Emitter::get_uint64(uint64_t value) {

View File

@ -36,6 +36,8 @@ public:
void Lock();
void Unlock();
uint32_t cpu_feature_mask() const { return cpu_feature_mask_; }
int PrepareFunction(sdb::FunctionSymbol* symbol);
int MakeFunction(sdb::FunctionSymbol* symbol);
@ -53,6 +55,8 @@ public:
void TraceInstruction(ppc::InstrData& i);
void TraceInvalidInstruction(ppc::InstrData& i);
void TraceBranch(uint32_t cia);
void TraceVR(uint32_t vr0, uint32_t vr1 = UINT_MAX, uint32_t vr2 = UINT_MAX,
uint32_t vr3 = UINT_MAX, uint32_t vr4 = UINT_MAX);
int GenerateIndirectionBranch(uint32_t cia, AsmJit::GpVar& target,
bool lk, bool likely_local);
@ -128,6 +132,7 @@ private:
xe_memory_ref memory_;
GlobalExports global_exports_;
xe_mutex_t* lock_;
uint32_t cpu_feature_mask_;
void* gpu_this_;
void* gpu_read_;

View File

@ -100,8 +100,29 @@ static const BitDescription x86Features[] = {
int X64JIT::CheckProcessor() {
const CpuInfo* cpu = CpuInfo::getGlobal();
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
const uint32_t mask = cpu->getFeatures();
// TODO(benvanik): ensure features we want are supported.
// TODO(benvanik): check for SSE modes we use.
if (!(mask & kX86FeatureSse3)) {
XELOGE("CPU does not support SSE3+ instructions!");
DumpCPUInfo();
return 1;
}
if (!(mask & kX86FeatureSse41)) {
XELOGW("CPU does not support SSE4.1+ instructions, performance degraded!");
DumpCPUInfo();
}
return 0;
}
void X64JIT::DumpCPUInfo() {
const CpuInfo* cpu = CpuInfo::getGlobal();
const X86CpuInfo* x86Cpu = static_cast<const X86CpuInfo*>(cpu);
const uint32_t mask = cpu->getFeatures();
#if 0
XELOGCPU("Processor Info:");
XELOGCPU(" Vendor string : %s", cpu->getVendorString());
XELOGCPU(" Brand string : %s", cpu->getBrandString());
@ -117,17 +138,11 @@ int X64JIT::CheckProcessor() {
XELOGCPU(" Max logical Processors: %u", x86Cpu->getMaxLogicalProcessors());
XELOGCPU(" APIC Physical ID : %u", x86Cpu->getApicPhysicalId());
XELOGCPU(" Features:");
uint32_t mask = cpu->getFeatures();
for (const BitDescription* d = x86Features; d->mask; d++) {
if (mask & d->mask) {
XELOGCPU(" %s", d->description);
}
}
#endif
// TODO(benvanik): ensure features we want are supported.
return 0;
}
int X64JIT::InitModule(ExecModule* module) {

View File

@ -41,6 +41,7 @@ public:
protected:
int CheckProcessor();
void DumpCPUInfo();
X64Emitter* emitter_;
};