windows build fixes

fixed OP_LOAD_LOCAL / OP_STORE_LOCAL on windows
use r14 / r15 for memory and guest ctx pointers
updated SH4 builder to emit consistent IR regardless of function parameter evaluation order
This commit is contained in:
Anthony Pesch 2016-03-26 14:34:26 -07:00
parent 80dc179ccf
commit f72f05a437
23 changed files with 679 additions and 516 deletions

View File

@ -6,6 +6,49 @@
#include <stdint.h>
#include <stdlib.h>
//
// cross-platform Berkeley sockets shim
//
#if PLATFORM_WINDOWS
#include <winsock2.h>
#include <ws2tcpip.h>
typedef int socklen_t;
typedef u_long ioctlarg_t;
#define sockerrno WSAGetLastError()
#define SHUT_RD SD_RECEIVE
#define SHUT_WR SD_SEND
#define SHUT_RDWR SD_BOTH
#else
#include <sys/socket.h>
#include <errno.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
typedef int SOCKET;
typedef int ioctlarg_t;
#define INVALID_SOCKET -1
#define SOCKET_ERROR -1
#define closesocket close
#define ioctlsocket ioctl
#define sockerrno errno
#endif
//
// target machine interface
//
@ -111,8 +154,8 @@ typedef struct {
typedef struct {
gdb_target_t target;
int listen;
int client;
SOCKET listen;
SOCKET client;
gdb_connection_t conn;
} gdb_server_t;
@ -140,56 +183,21 @@ void gdb_server_destroy(gdb_server_t *sv);
#define GDB_SERVER_MALLOC malloc
#endif
#ifndef GDB_SERVER_ALLOCA
#if PLATFORM_WINDOWS
#include <malloc.h>
#define GDB_SERVER_ALLOCA _alloca
#else
#define GDB_SERVER_ALLOCA alloca
#endif
#endif
#ifndef GDB_SERVER_FREE
#define GDB_SERVER_FREE free
#endif
#define GDB_SERVER_UNUSED(x) ((void)x)
//
// cross-platform Berkeley sockets shim
//
#if PLATFORM_WINDOWS
#include <winsock2.h>
#include <ws2tcpip.h>
typedef int socklen_t;
typedef u_long ioctlarg_t;
#define EAGAIN WSAEWOULDBLOCK
#define EADDRNOTAVAIL WSAEADDRNOTAVAIL
#define EAFNOSUPPORT WSAEAFNOSUPPORT
#define ECONNRESET WSAECONNRESET
#define sockerrno WSAGetLastError()
#else
#include <sys/socket.h>
#include <errno.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
typedef int SOCKET;
typedef int ioctlarg_t;
#define INVALID_SOCKET -1
#define SOCKET_ERROR -1
#define closesocket close
#define ioctlsocket ioctl
#define sockerrno errno
#endif
//
// gdb server implementation
//
@ -356,7 +364,7 @@ static int gdb_server_create_listen(gdb_server_t *sv, int port) {
// enable reusing of the address / port
int on = 1;
if (setsockopt(sv->listen, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) ==
if (setsockopt(sv->listen, SOL_SOCKET, SO_REUSEADDR, reinterpret_cast<char *>(&on), sizeof(on)) ==
SOCKET_ERROR) {
GDB_SERVER_LOG("Failed to set socket options for gdb socket");
ret = -1;
@ -430,7 +438,7 @@ static void gdb_server_accept_client(gdb_server_t *sv) {
t.tv_sec = 0;
t.tv_usec = 0;
if (select(sv->listen + 1, &fd_read, NULL, NULL, &t) == SOCKET_ERROR) {
if (select(static_cast<int>(sv->listen + 1), &fd_read, NULL, NULL, &t) == SOCKET_ERROR) {
return;
}
@ -478,7 +486,7 @@ static int gdb_server_data_available(gdb_server_t *sv) {
t.tv_sec = 0;
t.tv_usec = 0;
if (select(sv->client + 1, &fd_read, NULL, NULL, &t) == SOCKET_ERROR) {
if (select(static_cast<int>(sv->client + 1), &fd_read, NULL, NULL, &t) == SOCKET_ERROR) {
return -1;
}
@ -765,7 +773,7 @@ static int gdb_server_handle_m(gdb_server_t *sv, const char *data) {
data = parse_hex(&data[1], &length);
// read bytes from the target
uint8_t *memory = (uint8_t *)alloca(length);
uint8_t *memory = (uint8_t *)GDB_SERVER_ALLOCA(length);
memset(memory, 0, length);
sv->target.read_mem(sv->target.ctx, addr, memory, length);

View File

@ -9,9 +9,7 @@ namespace re {
template <typename T>
class array {
public:
array(int size = 8) : data_(nullptr), size_(0), capacity_(0) {
Resize(size);
}
array(int size = 8) : data_(nullptr), size_(0), capacity_(0) { Resize(size); }
~array() { free(data_); }
array(array const &) = delete;

View File

@ -11,7 +11,7 @@
#define ANSI_COLOR_RESET "\x1b[0m"
void Log(LogLevel level, const char *format, ...) {
static char buffer[10240];
static char buffer[0x20000];
va_list args;
va_start(args, format);

View File

@ -3,6 +3,10 @@
namespace re {
#if PLATFORM_WINDOWS
#define alloca _alloca
#endif
template <typename T>
T load(const void *ptr) {
return *reinterpret_cast<const T *>(ptr);

View File

@ -9,7 +9,7 @@
namespace re {
#ifndef HAVE_STRCASECMP
#define strcasecmp stricmp
#define strcasecmp _stricmp
#endif
#ifndef HAVE_STRNSTR

View File

@ -110,7 +110,8 @@ void Tracer::OnPaint(bool show_main_menu) {
RenderContextMenu();
// clamp surfaces the last surface belonging to the current param
int last_idx = rctx_.surfs.size() - 1;
int n = static_cast<int>(rctx_.surfs.size());
int last_idx = n - 1;
if (current_offset_ != INVALID_OFFSET) {
const auto &param_entry = rctx_.param_map[current_offset_];
@ -118,9 +119,10 @@ void Tracer::OnPaint(bool show_main_menu) {
}
// render the context
rb_.BeginSurfaces(rctx_.projection, rctx_.verts.data(), rctx_.verts.size());
rb_.BeginSurfaces(rctx_.projection, rctx_.verts.data(),
static_cast<int>(rctx_.verts.size()));
for (int i = 0, n = rctx_.surfs.size(); i < n; i++) {
for (int i = 0; i < n; i++) {
int idx = rctx_.sorted_surfs[i];
// if this surface comes after the current parameter, ignore it

View File

@ -8,6 +8,9 @@ using namespace re::hw;
using namespace re::hw::aica;
using namespace re::hw::holly;
template <>
uint32_t AICA::ReadWave(uint32_t addr);
AICA::AICA(Dreamcast &dc)
: Device(dc),
MemoryInterface(this),
@ -64,29 +67,32 @@ void AICA::WriteRegister(uint32_t addr, uint32_t value) {
template <typename T>
T AICA::ReadWave(uint32_t addr) {
if (sizeof(T) == 4) {
// FIXME temp hacks to get Crazy Taxi 1 booting
if (addr == 0x104 || addr == 0x284 || addr == 0x288) {
return static_cast<T>(0x54494e49);
}
// FIXME temp hacks to get Crazy Taxi 2 booting
if (addr == 0x5c) {
return static_cast<T>(0x54494e49);
}
// FIXME temp hacks to get PoP booting
if (addr == 0xb200 || addr == 0xb210 || addr == 0xb220 || addr == 0xb230 ||
addr == 0xb240 || addr == 0xb250 || addr == 0xb260 || addr == 0xb270 ||
addr == 0xb280 || addr == 0xb290 || addr == 0xb2a0 || addr == 0xb2b0 ||
addr == 0xb2c0 || addr == 0xb2d0 || addr == 0xb2e0 || addr == 0xb2f0 ||
addr == 0xb300 || addr == 0xb310 || addr == 0xb320 || addr == 0xb330 ||
addr == 0xb340 || addr == 0xb350 || addr == 0xb360 || addr == 0xb370 ||
addr == 0xb380 || addr == 0xb390 || addr == 0xb3a0 || addr == 0xb3b0 ||
addr == 0xb3c0 || addr == 0xb3d0 || addr == 0xb3e0 || addr == 0xb3f0) {
return static_cast<T>(0x0);
}
return re::load<T>(&wave_ram_[addr]);
}
template <>
uint32_t AICA::ReadWave(uint32_t addr) {
// FIXME temp hacks to get Crazy Taxi 1 booting
if (addr == 0x104 || addr == 0x284 || addr == 0x288) {
return 0x54494e49;
}
// FIXME temp hacks to get Crazy Taxi 2 booting
if (addr == 0x5c) {
return 0x54494e49;
}
// FIXME temp hacks to get PoP booting
if (addr == 0xb200 || addr == 0xb210 || addr == 0xb220 || addr == 0xb230 ||
addr == 0xb240 || addr == 0xb250 || addr == 0xb260 || addr == 0xb270 ||
addr == 0xb280 || addr == 0xb290 || addr == 0xb2a0 || addr == 0xb2b0 ||
addr == 0xb2c0 || addr == 0xb2d0 || addr == 0xb2e0 || addr == 0xb2f0 ||
addr == 0xb300 || addr == 0xb310 || addr == 0xb320 || addr == 0xb330 ||
addr == 0xb340 || addr == 0xb350 || addr == 0xb360 || addr == 0xb370 ||
addr == 0xb380 || addr == 0xb390 || addr == 0xb3a0 || addr == 0xb3b0 ||
addr == 0xb3c0 || addr == 0xb3d0 || addr == 0xb3e0 || addr == 0xb3f0) {
return 0x0;
}
return re::load<T>(&wave_ram_[addr]);
return re::load<uint32_t>(&wave_ram_[addr]);
}
template <typename T>

View File

@ -81,18 +81,18 @@ void Debugger::gdb_server_step(void *data) {
void Debugger::gdb_server_add_bp(void *data, int type, intmax_t addr) {
Debugger *debugger = reinterpret_cast<Debugger *>(data);
debugger->debug_->AddBreakpoint(type, addr);
debugger->debug_->AddBreakpoint(type, static_cast<uint32_t>(addr));
}
void Debugger::gdb_server_rem_bp(void *data, int type, intmax_t addr) {
Debugger *debugger = reinterpret_cast<Debugger *>(data);
debugger->debug_->RemoveBreakpoint(type, addr);
debugger->debug_->RemoveBreakpoint(type, static_cast<uint32_t>(addr));
}
void Debugger::gdb_server_read_mem(void *data, intmax_t addr, uint8_t *buffer,
int size) {
Debugger *debugger = reinterpret_cast<Debugger *>(data);
debugger->debug_->ReadMemory(addr, buffer, size);
debugger->debug_->ReadMemory(static_cast<uint32_t>(addr), buffer, size);
}
void Debugger::gdb_server_read_reg(void *data, int n, intmax_t *value,

View File

@ -196,7 +196,7 @@ void GDROM::TriggerEvent(GDEvent ev, intptr_t arg0, intptr_t arg1) {
int num_sectors = std::min(cdreq_.num_sectors, max_pio_sectors);
pio_size_ = ReadSectors(cdreq_.first_sector, cdreq_.sector_format,
cdreq_.sector_mask, num_sectors, pio_buffer_,
sizeof(pio_buffer_));
static_cast<int>(sizeof(pio_buffer_)));
pio_head_ = 0;
// update sector read state

View File

@ -7,6 +7,7 @@
#include "hw/holly/trace.h"
#include "hw/dreamcast.h"
#include "hw/memory.h"
#include "sys/filesystem.h"
using namespace re;
using namespace re::hw;

View File

@ -105,8 +105,7 @@ TextureKey TextureProvider::GetTextureKey(const TSP &tsp, const TCW &tcw) {
return ((uint64_t)tsp.full << 32) | tcw.full;
}
TileRenderer::TileRenderer(renderer::Backend &rb,
TextureProvider &texture_provider)
TileRenderer::TileRenderer(Backend &rb, TextureProvider &texture_provider)
: rb_(rb), texture_provider_(texture_provider) {}
void TileRenderer::ParseContext(const TileContext &tctx,
@ -164,8 +163,7 @@ void TileRenderer::ParseContext(const TileContext &tctx,
// map ta parameters to their translated surfaces / vertices
if (map_params) {
int offset = static_cast<int>(data - tctx.data);
rctx->param_map[offset] = {static_cast<int>(rctx->surfs.size()),
static_cast<int>(rctx->verts.size())};
rctx->param_map[offset] = {rctx->surfs.size(), rctx->verts.size()};
}
data += TileAccelerator::GetParamSize(pcw, vertex_type_);
@ -175,10 +173,14 @@ void TileRenderer::ParseContext(const TileContext &tctx,
}
void TileRenderer::RenderContext(const TileRenderContext &rctx) {
rb_.BeginSurfaces(rctx_.projection, rctx_.verts.data(), rctx_.verts.size());
auto &surfs = rctx_.surfs;
auto &verts = rctx_.verts;
auto &sorted_surfs = rctx_.sorted_surfs;
for (int i = 0, n = rctx_.surfs.size(); i < n; i++) {
rb_.DrawSurface(rctx_.surfs[rctx_.sorted_surfs[i]]);
rb_.BeginSurfaces(rctx_.projection, verts.data(), verts.size());
for (int i = 0, n = surfs.size(); i < n; i++) {
rb_.DrawSurface(surfs[sorted_surfs[i]]);
}
rb_.EndSurfaces();
@ -205,17 +207,20 @@ void TileRenderer::Reset(TileRenderContext *rctx) {
}
Surface &TileRenderer::AllocSurf(TileRenderContext *rctx, bool copy_from_prev) {
int id = rctx->surfs.size();
rctx->surfs.Resize(id + 1);
Surface &surf = rctx->surfs[id];
auto &surfs = rctx->surfs;
int id = surfs.size();
surfs.Resize(id + 1);
Surface &surf = surfs[id];
// either reset the surface state, or copy the state from the previous surface
if (copy_from_prev) {
new (&surf) Surface(rctx->surfs[id - 1]);
new (&surf) Surface(surfs[id - 1]);
} else {
new (&surf) Surface();
}
// star verts at the end
surf.first_vert = rctx->verts.size();
surf.num_verts = 0;
@ -227,14 +232,17 @@ Surface &TileRenderer::AllocSurf(TileRenderContext *rctx, bool copy_from_prev) {
}
Vertex &TileRenderer::AllocVert(TileRenderContext *rctx) {
int id = rctx->verts.size();
rctx->verts.Resize(id + 1);
Vertex &v = rctx->verts[id];
auto &surfs = rctx->surfs;
auto &verts = rctx->verts;
int id = verts.size();
verts.Resize(id + 1);
Vertex &v = verts[id];
new (&v) Vertex();
// update vertex count on the current surface
Surface &surf = rctx->surfs.back();
Surface &surf = surfs.back();
surf.num_verts++;
return v;
@ -670,32 +678,36 @@ void TileRenderer::ParseEndOfList(const TileContext &tctx,
const uint8_t *data) {
DiscardIncompleteSurf(rctx);
auto &surfs = rctx->surfs;
auto &verts = rctx->verts;
auto &sorted_surfs = rctx->sorted_surfs;
int first_surf_to_sort = last_sorted_surf_;
int num_surfs_to_sort = rctx->surfs.size() - last_sorted_surf_;
int num_surfs_to_sort = surfs.size() - last_sorted_surf_;
// sort transparent polys by their z value, from back to front. remember, in
// dreamcast coordinates smaller z values are further away from the camera
if ((list_type_ == TA_LIST_TRANSLUCENT ||
list_type_ == TA_LIST_TRANSLUCENT_MODVOL) &&
tctx.autosort) {
int *first = &rctx->sorted_surfs[first_surf_to_sort];
int *last = &rctx->sorted_surfs[first_surf_to_sort + num_surfs_to_sort];
int *first = &sorted_surfs[first_surf_to_sort];
int *last = &sorted_surfs[first_surf_to_sort + num_surfs_to_sort];
std::sort(first, last, [&](int a, int b) {
Surface *surfa = &rctx->surfs[a];
Surface *surfb = &rctx->surfs[b];
Surface &surfa = surfs[a];
Surface &surfb = surfs[b];
float minza = std::numeric_limits<float>::max();
for (int i = 0; i < surfa->num_verts; i++) {
Vertex *v = &rctx->verts[surfa->first_vert + i];
if (v->xyz[2] < minza) {
minza = v->xyz[2];
for (int i = 0, n = surfa.num_verts; i < n; i++) {
Vertex &v = verts[surfa.first_vert + i];
if (v.xyz[2] < minza) {
minza = v.xyz[2];
}
}
float minzb = std::numeric_limits<float>::max();
for (int i = 0; i < surfb->num_verts; i++) {
Vertex *v = &rctx->verts[surfb->first_vert + i];
if (v->xyz[2] < minzb) {
minzb = v->xyz[2];
for (int i = 0, n = surfb.num_verts; i < n; i++) {
Vertex &v = verts[surfb.first_vert + i];
if (v.xyz[2] < minzb) {
minzb = v.xyz[2];
}
}
@ -705,7 +717,7 @@ void TileRenderer::ParseEndOfList(const TileContext &tctx,
last_poly_ = nullptr;
last_vertex_ = nullptr;
last_sorted_surf_ = static_cast<int>(rctx->surfs.size());
last_sorted_surf_ = surfs.size();
}
// Vertices coming into the TA are in window space, with the Z component being
@ -716,17 +728,18 @@ void TileRenderer::ParseEndOfList(const TileContext &tctx,
// in order to perspective correct the texture mapping.
void TileRenderer::FillProjectionMatrix(const TileContext &tctx,
TileRenderContext *rctx) {
auto &verts = rctx->verts;
float znear = std::numeric_limits<float>::min();
float zfar = std::numeric_limits<float>::max();
// Z component is 1/W, so +Z is into the screen
for (int i = 0, n = rctx->verts.size(); i < n; i++) {
Vertex *v = &rctx->verts[i];
if (v->xyz[2] > znear) {
znear = v->xyz[2];
for (int i = 0, n = verts.size(); i < n; i++) {
Vertex &v = verts[i];
if (v.xyz[2] > znear) {
znear = v.xyz[2];
}
if (v->xyz[2] < zfar) {
zfar = v->xyz[2];
if (v.xyz[2] < zfar) {
zfar = v.xyz[2];
}
}

View File

@ -107,19 +107,18 @@ Memory::Memory(Machine &machine)
physical_base_(nullptr),
virtual_base_(nullptr),
protected_base_(nullptr) {
regions_ = new MemoryRegion[MAX_REGIONS]();
num_regions_ = 1; // 0 page is reserved
regions_ = new MemoryRegion[MAX_REGIONS]();
pages_ = new PageEntry[NUM_PAGES]();
}
Memory::~Memory() {
delete[] regions_;
delete[] pages_;
Unmap();
DestroySharedMemory();
delete[] regions_;
delete[] pages_;
}
bool Memory::Init() {

View File

@ -220,8 +220,8 @@ class Memory {
sys::SharedMemoryHandle shmem_;
// physical regions of memory
MemoryRegion *regions_;
int num_regions_;
MemoryRegion *regions_;
// map virtual addresses -> physical addresses
PageEntry *pages_;

View File

@ -7,10 +7,10 @@
namespace re {
namespace hw {
static const uint32_t R = 0x1;
static const uint32_t W = 0x2;
static const uint32_t RW = 0x3;
static const uint32_t UNDEFINED = 0x0;
static const uint8_t R = 0x1;
static const uint8_t W = 0x2;
static const uint8_t RW = 0x3;
static const uint8_t UNDEFINED = 0x0;
struct Register;

View File

@ -122,7 +122,7 @@ void SH4::Run(const std::chrono::nanoseconds &delta) {
s_current_cpu = this;
// each block's epilog will decrement the remaining cycles as they run
ctx_.num_cycles = cycles;
ctx_.num_cycles = static_cast<int>(cycles);
while (ctx_.num_cycles > 0) {
SH4BlockEntry *block = code_cache_->GetBlock(ctx_.pc);
@ -748,9 +748,9 @@ uint32_t SH4::TimerCount(int n) {
int64_t freq = PERIPHERAL_CLOCK_FREQ >> PERIPHERAL_SCALE[tcr & 7];
std::chrono::nanoseconds remaining = scheduler_->RemainingTime(handle);
int64_t cycles = static_cast<uint32_t>(NANO_TO_CYCLES(remaining, freq));
int64_t cycles = NANO_TO_CYCLES(remaining, freq);
return cycles;
return static_cast<uint32_t>(cycles);
}
void SH4::RescheduleTimer(int n, uint32_t tcnt, uint32_t tcr) {

View File

@ -22,34 +22,34 @@ namespace backend {
namespace x64 {
// x64 register layout
// %rax %eax %ax %al <-- temporary
// %rcx %ecx %cx %cl <-- argument
// %rdx %edx %dx %dl <-- argument
// %rbx %ebx %bx %bl <-- available, callee saved
// %rsp %esp %sp %spl <-- reserved
// %rbp %ebp %bp %bpl <-- available, callee saved
// %rsi %esi %si %sil <-- argument
// %rdi %edi %di %dil <-- argument
// %r8 %r8d %r8w %r8b <-- argument
// %r9 %r9d %r9w %r9b <-- argument
// %r10 %r10d %r10w %r10b <-- available, not callee saved
// %r11 %r11d %r11w %r11b <-- available, not callee saved
// %r12 %r12d %r12w %r12b <-- available, callee saved
// %r13 %r13d %r13w %r13b <-- available, callee saved
// %r14 %r14d %r14w %r14b <-- available, callee saved
// %r15 %r15d %r15w %r15b <-- available, callee saved
// %rax %eax %ax %al <-- both: temporary
// %rcx %ecx %cx %cl <-- both: argument
// %rdx %edx %dx %dl <-- both: argument
// %rbx %ebx %bx %bl <-- both: available (callee saved)
// %rsp %esp %sp %spl <-- both: reserved
// %rbp %ebp %bp %bpl <-- both: available (callee saved)
// %rsi %esi %si %sil <-- msvc: available (callee saved), amd64: argument
// %rdi %edi %di %dil <-- msvc: available (callee saved), amd64: argument
// %r8 %r8d %r8w %r8b <-- both: argument
// %r9 %r9d %r9w %r9b <-- both: argument
// %r10 %r10d %r10w %r10b <-- both: available (not callee saved)
// %r11 %r11d %r11w %r11b <-- both: available (not callee saved)
// %r12 %r12d %r12w %r12b <-- both: available (callee saved)
// %r13 %r13d %r13w %r13b <-- both: available (callee saved)
// %r14 %r14d %r14w %r14b <-- both: available (callee saved)
// %r15 %r15d %r15w %r15b <-- both: available (callee saved)
// msvc calling convention uses rcx, rdx, r8 and r9 for arguments
// amd64 calling convention uses rdi, rsi, rdx, rcx, r8 and r9 for arguments
// msvc calling convention uses rcx, rdx, r8, r9 for arguments
// amd64 calling convention uses rdi, rsi, rdx, rcx, r8, r9 for arguments
// both use the same xmm registers for floating point arguments
// our largest function call uses only 3 arguments, leaving rdi, rsi and r9
// available on msvc and rcx, r8 and r9 available on amd64
// our largest function call uses only 3 arguments
// msvc is left with rax, rdi, rsi, r9-r11,
// amd64 is left with rax, rcx, r8-r11 available on amd64
// rax is used as a scratch register, while rdi/r8, r9 and xmm1 are used for
// storing
// a constant in case the constant propagation pass didn't eliminate it
// rax is used as a scratch register
// r10, r11, xmm1 are used for constant not eliminated by const propagation
// r14, r15 are reserved for the context and memory pointers
// rsi is left unused on msvc and rcx is left unused on amd64
const Register x64_registers[] = {
{"rbx", ir::VALUE_INT_MASK,
reinterpret_cast<const void *>(&Xbyak::util::rbx)},
@ -59,10 +59,10 @@ const Register x64_registers[] = {
reinterpret_cast<const void *>(&Xbyak::util::r12)},
{"r13", ir::VALUE_INT_MASK,
reinterpret_cast<const void *>(&Xbyak::util::r13)},
{"r14", ir::VALUE_INT_MASK,
reinterpret_cast<const void *>(&Xbyak::util::r14)},
{"r15", ir::VALUE_INT_MASK,
reinterpret_cast<const void *>(&Xbyak::util::r15)},
// {"r14", ir::VALUE_INT_MASK,
// reinterpret_cast<const void *>(&Xbyak::util::r14)},
// {"r15", ir::VALUE_INT_MASK,
// reinterpret_cast<const void *>(&Xbyak::util::r15)},
{"xmm6", ir::VALUE_FLOAT_MASK,
reinterpret_cast<const void *>(&Xbyak::util::xmm6)},
{"xmm7", ir::VALUE_FLOAT_MASK,
@ -82,15 +82,13 @@ const int x64_num_registers = sizeof(x64_registers) / sizeof(Register);
const int x64_arg0_idx = Xbyak::Operand::RCX;
const int x64_arg1_idx = Xbyak::Operand::RDX;
const int x64_arg2_idx = Xbyak::Operand::R8;
const int x64_tmp0_idx = Xbyak::Operand::RDI;
const int x64_tmp1_idx = Xbyak::Operand::R9;
#else
const int x64_arg0_idx = Xbyak::Operand::RDI;
const int x64_arg1_idx = Xbyak::Operand::RSI;
const int x64_arg2_idx = Xbyak::Operand::RDX;
const int x64_tmp0_idx = Xbyak::Operand::R8;
const int x64_tmp1_idx = Xbyak::Operand::R9;
#endif
const int x64_tmp0_idx = Xbyak::Operand::R10;
const int x64_tmp1_idx = Xbyak::Operand::R11;
}
}
}
@ -240,8 +238,6 @@ void X64Backend::AssembleThunks() {
Xbyak::Reg64 dst(i);
e.call(e.rax);
e.mov(dst, e.rax);
e.mov(e.r10, reinterpret_cast<uint64_t>(guest_ctx_));
e.mov(e.r11, reinterpret_cast<uint64_t>(memory_.protected_base()));
e.add(e.rsp, STACK_SHADOW_SPACE + 8);
e.ret();
}
@ -253,8 +249,6 @@ void X64Backend::AssembleThunks() {
store_thunk_ = e.getCurr<SlowmemThunk>();
e.call(e.rax);
e.mov(e.r10, reinterpret_cast<uint64_t>(guest_ctx_));
e.mov(e.r11, reinterpret_cast<uint64_t>(memory_.protected_base()));
e.add(e.rsp, STACK_SHADOW_SPACE + 8);
e.ret();
}

View File

@ -41,18 +41,15 @@ static bool IsCalleeSaved(const Xbyak::Reg &reg) {
false, // RCX
false, // RDX
true, // RBX
#if PLATFORM_WINDOWS
true, // RSP
#else
false, // RSP
#endif
true, // RBP
true, // RBP
#if PLATFORM_WINDOWS
true, // RSI
true, // RDI
#else
false, // RSI
#endif
false, // RDI
#endif
false, // R8
false, // R9
false, // R10
@ -138,7 +135,11 @@ void X64Emitter::EmitProlog(IRBuilder &builder, int *out_stack_size) {
}
// push the callee-saved registers which have been modified
int pushed = 0;
int pushed = 2;
// always used by guest ctx and memory pointers
push(r15);
push(r14);
for (int i = 0; i < x64_num_registers; i++) {
const Xbyak::Reg &reg =
@ -160,8 +161,8 @@ void X64Emitter::EmitProlog(IRBuilder &builder, int *out_stack_size) {
sub(rsp, stack_size);
// copy guest context and memory base to argument registers
mov(r10, reinterpret_cast<uint64_t>(guest_ctx_));
mov(r11, reinterpret_cast<uint64_t>(memory_->protected_base()));
mov(r14, reinterpret_cast<uint64_t>(guest_ctx_));
mov(r15, reinterpret_cast<uint64_t>(memory_->protected_base()));
*out_stack_size = stack_size;
}
@ -192,6 +193,10 @@ void X64Emitter::EmitEpilog(IRBuilder &builder, int stack_size) {
}
}
// pop r14 and r15
pop(r14);
pop(r15);
ret();
}
@ -481,23 +486,19 @@ EMITTER(LOAD_GUEST) {
e.mov(arg1, a);
e.call(reinterpret_cast<void *>(fn));
e.mov(result, e.rax);
// restore context register
e.mov(e.r10, reinterpret_cast<uint64_t>(e.guest_ctx()));
e.mov(e.r11, reinterpret_cast<uint64_t>(e.memory()->protected_base()));
} else {
switch (instr->type()) {
case VALUE_I8:
e.mov(result, e.byte[a.cvt64() + e.r11]);
e.mov(result, e.byte[a.cvt64() + e.r15]);
break;
case VALUE_I16:
e.mov(result, e.word[a.cvt64() + e.r11]);
e.mov(result, e.word[a.cvt64() + e.r15]);
break;
case VALUE_I32:
e.mov(result, e.dword[a.cvt64() + e.r11]);
e.mov(result, e.dword[a.cvt64() + e.r15]);
break;
case VALUE_I64:
e.mov(result, e.qword[a.cvt64() + e.r11]);
e.mov(result, e.qword[a.cvt64() + e.r15]);
break;
default:
LOG_FATAL("Unexpected load result type");
@ -577,23 +578,19 @@ EMITTER(STORE_GUEST) {
e.mov(arg1, a);
e.mov(arg2, b);
e.call(reinterpret_cast<void *>(fn));
// restore context register
e.mov(e.r10, reinterpret_cast<uint64_t>(e.guest_ctx()));
e.mov(e.r11, reinterpret_cast<uint64_t>(e.memory()->protected_base()));
} else {
switch (instr->arg1()->type()) {
case VALUE_I8:
e.mov(e.byte[a.cvt64() + e.r11], b);
e.mov(e.byte[a.cvt64() + e.r15], b);
break;
case VALUE_I16:
e.mov(e.word[a.cvt64() + e.r11], b);
e.mov(e.word[a.cvt64() + e.r15], b);
break;
case VALUE_I32:
e.mov(e.dword[a.cvt64() + e.r11], b);
e.mov(e.dword[a.cvt64() + e.r15], b);
break;
case VALUE_I64:
e.mov(e.qword[a.cvt64() + e.r11], b);
e.mov(e.qword[a.cvt64() + e.r15], b);
break;
default:
LOG_FATAL("Unexpected store value type");
@ -610,10 +607,10 @@ EMITTER(LOAD_CONTEXT) {
switch (instr->type()) {
case VALUE_F32:
e.movss(result, e.dword[e.r10 + offset]);
e.movss(result, e.dword[e.r14 + offset]);
break;
case VALUE_F64:
e.movsd(result, e.qword[e.r10 + offset]);
e.movsd(result, e.qword[e.r14 + offset]);
break;
default:
LOG_FATAL("Unexpected result type");
@ -624,16 +621,16 @@ EMITTER(LOAD_CONTEXT) {
switch (instr->type()) {
case VALUE_I8:
e.mov(result, e.byte[e.r10 + offset]);
e.mov(result, e.byte[e.r14 + offset]);
break;
case VALUE_I16:
e.mov(result, e.word[e.r10 + offset]);
e.mov(result, e.word[e.r14 + offset]);
break;
case VALUE_I32:
e.mov(result, e.dword[e.r10 + offset]);
e.mov(result, e.dword[e.r14 + offset]);
break;
case VALUE_I64:
e.mov(result, e.qword[e.r10 + offset]);
e.mov(result, e.qword[e.r14 + offset]);
break;
default:
LOG_FATAL("Unexpected result type");
@ -648,18 +645,18 @@ EMITTER(STORE_CONTEXT) {
if (instr->arg1()->constant()) {
switch (instr->arg1()->type()) {
case VALUE_I8:
e.mov(e.byte[e.r10 + offset], instr->arg1()->i8());
e.mov(e.byte[e.r14 + offset], instr->arg1()->i8());
break;
case VALUE_I16:
e.mov(e.word[e.r10 + offset], instr->arg1()->i16());
e.mov(e.word[e.r14 + offset], instr->arg1()->i16());
break;
case VALUE_I32:
case VALUE_F32:
e.mov(e.dword[e.r10 + offset], instr->arg1()->i32());
e.mov(e.dword[e.r14 + offset], instr->arg1()->i32());
break;
case VALUE_I64:
case VALUE_F64:
e.mov(e.qword[e.r10 + offset], instr->arg1()->i64());
e.mov(e.qword[e.r14 + offset], instr->arg1()->i64());
break;
default:
LOG_FATAL("Unexpected value type");
@ -671,10 +668,10 @@ EMITTER(STORE_CONTEXT) {
switch (instr->arg1()->type()) {
case VALUE_F32:
e.movss(e.dword[e.r10 + offset], src);
e.movss(e.dword[e.r14 + offset], src);
break;
case VALUE_F64:
e.movsd(e.qword[e.r10 + offset], src);
e.movsd(e.qword[e.r14 + offset], src);
break;
default:
LOG_FATAL("Unexpected value type");
@ -685,16 +682,16 @@ EMITTER(STORE_CONTEXT) {
switch (instr->arg1()->type()) {
case VALUE_I8:
e.mov(e.byte[e.r10 + offset], src);
e.mov(e.byte[e.r14 + offset], src);
break;
case VALUE_I16:
e.mov(e.word[e.r10 + offset], src);
e.mov(e.word[e.r14 + offset], src);
break;
case VALUE_I32:
e.mov(e.dword[e.r10 + offset], src);
e.mov(e.dword[e.r14 + offset], src);
break;
case VALUE_I64:
e.mov(e.qword[e.r10 + offset], src);
e.mov(e.qword[e.r14 + offset], src);
break;
default:
LOG_FATAL("Unexpected value type");
@ -705,7 +702,7 @@ EMITTER(STORE_CONTEXT) {
}
EMITTER(LOAD_LOCAL) {
int offset = instr->arg0()->i32();
int offset = STACK_OFFSET_LOCALS + instr->arg0()->i32();
if (IsFloatType(instr->type())) {
const Xbyak::Xmm result = e.GetXMMRegister(instr);
@ -745,7 +742,7 @@ EMITTER(LOAD_LOCAL) {
}
EMITTER(STORE_LOCAL) {
int offset = instr->arg0()->i32();
int offset = STACK_OFFSET_LOCALS + instr->arg0()->i32();
CHECK(!instr->arg1()->constant());
@ -1430,8 +1427,4 @@ EMITTER(CALL_EXTERNAL) {
// call the external function
e.CopyOperand(instr->arg0(), e.rax);
e.call(e.rax);
// restore context register
e.mov(e.r10, reinterpret_cast<uint64_t>(e.guest_ctx()));
e.mov(e.r11, reinterpret_cast<uint64_t>(e.memory()->protected_base()));
}

View File

@ -20,8 +20,7 @@ enum {
#else
STACK_SHADOW_SPACE = 0,
#endif
STACK_OFFSET_GUEST_CONTEXT = STACK_SHADOW_SPACE,
STACK_OFFSET_LOCALS = STACK_SHADOW_SPACE + 8,
STACK_OFFSET_LOCALS = STACK_SHADOW_SPACE,
STACK_SIZE = STACK_OFFSET_LOCALS
};

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@ void PassRunner::Run(IRBuilder &builder, bool debug) {
if (debug) {
LOG_INFO("Original:");
builder.Dump();
LOG_INFO("");
}
for (auto &pass : passes_) {
@ -25,6 +26,7 @@ void PassRunner::Run(IRBuilder &builder, bool debug) {
if (debug) {
LOG_INFO("%s:", pass->name());
builder.Dump();
LOG_INFO("");
}
}
}

View File

@ -1,3 +1,4 @@
#include "core/log.h"
#include "sys/network.h"
bool Network::Init() {

View File

@ -1698,7 +1698,7 @@ void MicroProfileImpl::DrawText(int x, int y, uint32_t color,
float fx = static_cast<float>(x);
float fy = static_cast<float>(y);
float fy2 = fy + (MICROPROFILE_TEXT_HEIGHT + 1);
int text_len = strlen(text);
int text_len = static_cast<int>(strlen(text));
Vertex2D *vertex = AllocVertices({PRIM_TRIANGLES,
font_tex_,

View File

@ -31,8 +31,8 @@ enum {
struct SH4Test {
const char *name;
const uint8_t *buffer;
uint32_t buffer_size;
uint32_t buffer_offset;
int buffer_size;
int buffer_offset;
SH4Context in;
SH4Context out;
};
@ -179,8 +179,11 @@ void RunSH4Test(const SH4Test &test) {
// setup initial stack pointer
sh4->ctx_.r[15] = 0x8d000000;
// load binary
dc->memory->Memcpy(0x8c010000, test.buffer, test.buffer_size);
// load binary. note, Memory::Memcpy only support 4 byte aligned sizes
int aligned_size = re::align_up(test.buffer_size, 4);
uint8_t *aligned_buffer = reinterpret_cast<uint8_t *>(alloca(aligned_size));
memcpy(aligned_buffer, test.buffer, test.buffer_size);
dc->memory->Memcpy(0x8c010000, aligned_buffer, aligned_size);
// skip to the test's offset
sh4->SetPC(0x8c010000 + test.buffer_offset);