Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
/*
|
2016-09-14 11:55:53 +00:00
|
|
|
libco.arm (2016-09-14)
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
author: byuu
|
|
|
|
license: public domain
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define LIBCO_C
|
|
|
|
#include "libco.h"
|
2016-09-14 11:55:53 +00:00
|
|
|
#include "settings.h"
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static thread_local unsigned long co_active_buffer[64];
|
|
|
|
static thread_local cothread_t co_active_handle = 0;
|
|
|
|
static void (*co_swap)(cothread_t, cothread_t) = 0;
|
|
|
|
|
2016-09-14 11:55:53 +00:00
|
|
|
#ifdef LIBCO_MPROTECT
|
|
|
|
alignas(4096)
|
|
|
|
#else
|
|
|
|
section(text)
|
|
|
|
#endif
|
|
|
|
static const unsigned long co_swap_function[1024] = {
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
0xe8a16ff0, /* stmia r1!, {r4-r11,sp,lr} */
|
|
|
|
0xe8b0aff0, /* ldmia r0!, {r4-r11,sp,pc} */
|
|
|
|
0xe12fff1e, /* bx lr */
|
|
|
|
};
|
|
|
|
|
2016-09-14 11:55:53 +00:00
|
|
|
static void co_init() {
|
|
|
|
#ifdef LIBCO_MPROTECT
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
unsigned long addr = (unsigned long)co_swap_function;
|
|
|
|
unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE));
|
|
|
|
unsigned long size = (addr - base) + sizeof co_swap_function;
|
2016-09-14 11:55:53 +00:00
|
|
|
mprotect((void*)base, size, PROT_READ | PROT_EXEC);
|
|
|
|
#endif
|
Update to v094r29 release.
byuu says:
Note: for Windows users, please go to nall/intrinsics.hpp line 60 and
correct the typo from "DISPLAY_WINDOW" to "DISPLAY_WINDOWS" before
compiling, otherwise things won't work at all.
This will be a really major WIP for the core SNES emulation, so please
test as thoroughly as possible.
I rewrote the 65816 CPU core's dispatcher from a jump table to a switch
table. This was so that I could pass class variables as parameters to
opcodes without crazy theatrics.
With that, I killed the regs.r[N] stuff, the flag_t operator|=, &=, ^=
stuff, and all of the template versions of opcodes.
I also removed some stupid pointless flag tests in xcn and pflag that
would always be true.
I sure hope that AWJ is happy with this; because this change was so that
my flag assignments and branch tests won't need to build regs.P into
a full 8-bit variable anymore.
It does of course incur a slight performance hit when you pass in
variables by-value to functions, but it should help with binary size
(and thus cache) by reducing a lot of extra functions. (I know I could
have used template parameters for some things even with a switch table,
but chose not to for the aforementioned reasons.)
Overall, it's about a ~1% speedup from the previous build. The CPU core
instructions were never a bottleneck, but I did want to fix the P flag
building stuff because that really was a dumb mistake v_v'
2015-06-22 13:31:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cothread_t co_active() {
|
|
|
|
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
|
|
|
return co_active_handle;
|
|
|
|
}
|
|
|
|
|
|
|
|
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
|
|
|
|
unsigned long* handle = 0;
|
|
|
|
if(!co_swap) {
|
|
|
|
co_init();
|
|
|
|
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
|
|
|
|
}
|
|
|
|
if(!co_active_handle) co_active_handle = &co_active_buffer;
|
|
|
|
size += 256;
|
|
|
|
size &= ~15;
|
|
|
|
|
|
|
|
if(handle = (unsigned long*)malloc(size)) {
|
|
|
|
unsigned long* p = (unsigned long*)((unsigned char*)handle + size);
|
|
|
|
handle[8] = (unsigned long)p;
|
|
|
|
handle[9] = (unsigned long)entrypoint;
|
|
|
|
}
|
|
|
|
|
|
|
|
return handle;
|
|
|
|
}
|
|
|
|
|
|
|
|
void co_delete(cothread_t handle) {
|
|
|
|
free(handle);
|
|
|
|
}
|
|
|
|
|
|
|
|
void co_switch(cothread_t handle) {
|
|
|
|
cothread_t co_previous_handle = co_active_handle;
|
|
|
|
co_swap(co_active_handle = handle, co_previous_handle);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|