2010-08-09 13:28:56 +00:00
|
|
|
/*
|
2016-09-14 11:55:53 +00:00
|
|
|
libco.x86 (2016-09-14)
|
2010-08-09 13:28:56 +00:00
|
|
|
author: byuu
|
|
|
|
license: public domain
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define LIBCO_C
|
|
|
|
#include "libco.h"
|
2016-09-14 11:55:53 +00:00
|
|
|
#include "settings.h"
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
|
2010-08-09 13:28:56 +00:00
|
|
|
#include <assert.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
#if defined(__clang__) || defined(__GNUC__)
|
2010-08-09 13:28:56 +00:00
|
|
|
#define fastcall __attribute__((fastcall))
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
#define fastcall __fastcall
|
2010-08-09 13:28:56 +00:00
|
|
|
#else
|
|
|
|
#error "libco: please define fastcall macro"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static thread_local long co_active_buffer[64];
|
|
|
|
static thread_local cothread_t co_active_handle = 0;
|
|
|
|
static void (fastcall *co_swap)(cothread_t, cothread_t) = 0;
|
|
|
|
|
2016-09-14 11:55:53 +00:00
|
|
|
#ifdef LIBCO_MPROTECT
|
|
|
|
alignas(4096)
|
|
|
|
#else
|
|
|
|
section(text)
|
|
|
|
#endif
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
/* ABI: fastcall */
|
2016-09-14 11:55:53 +00:00
|
|
|
static const unsigned char co_swap_function[4096] = {
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
0x89, 0x22, /* mov [edx],esp */
|
|
|
|
0x8b, 0x21, /* mov esp,[ecx] */
|
|
|
|
0x58, /* pop eax */
|
|
|
|
0x89, 0x6a, 0x04, /* mov [edx+ 4],ebp */
|
|
|
|
0x89, 0x72, 0x08, /* mov [edx+ 8],esi */
|
|
|
|
0x89, 0x7a, 0x0c, /* mov [edx+12],edi */
|
|
|
|
0x89, 0x5a, 0x10, /* mov [edx+16],ebx */
|
|
|
|
0x8b, 0x69, 0x04, /* mov ebp,[ecx+ 4] */
|
|
|
|
0x8b, 0x71, 0x08, /* mov esi,[ecx+ 8] */
|
|
|
|
0x8b, 0x79, 0x0c, /* mov edi,[ecx+12] */
|
|
|
|
0x8b, 0x59, 0x10, /* mov ebx,[ecx+16] */
|
|
|
|
0xff, 0xe0, /* jmp eax */
|
2010-08-09 13:28:56 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
#include <windows.h>
|
|
|
|
|
2016-09-14 11:55:53 +00:00
|
|
|
static void co_init() {
|
|
|
|
#ifdef LIBCO_MPROTECT
|
2010-08-09 13:28:56 +00:00
|
|
|
DWORD old_privileges;
|
2016-09-14 11:55:53 +00:00
|
|
|
VirtualProtect((void*)co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READ, &old_privileges);
|
|
|
|
#endif
|
2010-08-09 13:28:56 +00:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
|
2016-09-14 11:55:53 +00:00
|
|
|
static void co_init() {
|
|
|
|
#ifdef LIBCO_MPROTECT
|
2010-08-09 13:28:56 +00:00
|
|
|
unsigned long addr = (unsigned long)co_swap_function;
|
|
|
|
unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE));
|
|
|
|
unsigned long size = (addr - base) + sizeof co_swap_function;
|
2016-09-14 11:55:53 +00:00
|
|
|
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
|
|
|
|
#endif
|
2010-08-09 13:28:56 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static void crash() {
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
assert(0); /* called only if cothread_t entrypoint returns */
|
2010-08-09 13:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cothread_t co_active() {
|
|
|
|
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
|
|
|
return co_active_handle;
|
|
|
|
}
|
|
|
|
|
|
|
|
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
|
|
|
cothread_t handle;
|
|
|
|
if(!co_swap) {
|
|
|
|
co_init();
|
|
|
|
co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function;
|
|
|
|
}
|
|
|
|
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
size += 256; /* allocate additional space for storage */
|
|
|
|
size &= ~15; /* align stack to 16-byte boundary */
|
2010-08-09 13:28:56 +00:00
|
|
|
|
|
|
|
if(handle = (cothread_t)malloc(size)) {
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
long *p = (long*)((char*)handle + size); /* seek to top of stack */
|
|
|
|
*--p = (long)crash; /* crash if entrypoint returns */
|
|
|
|
*--p = (long)entrypoint; /* start of function */
|
|
|
|
*(long*)handle = (long)p; /* stack pointer */
|
2010-08-09 13:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return handle;
|
|
|
|
}
|
|
|
|
|
|
|
|
void co_delete(cothread_t handle) {
|
|
|
|
free(handle);
|
|
|
|
}
|
|
|
|
|
|
|
|
void co_switch(cothread_t handle) {
|
|
|
|
register cothread_t co_previous_handle = co_active_handle;
|
|
|
|
co_swap(co_active_handle = handle, co_previous_handle);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|