Update to bsnes v053 release.

This release greatly polishes the user interface, adds a new cheat code search utility, adds the snesfilter library, and adds Qt-based GUI support to both snesfilter and snesreader. snesfilter gains 2xSaI, Super 2xSaI and Super Eagle support, plus full configuration for both the NTSC and scanline filters; and snesreader gains support support for multi-file ROM archives (eg GoodMerge sets.)
Statically linking Qt to bsnes, snesfilter and snesreader would be too prohibitive size-wise (~10MB or so.) I have to link dynamically so that all three can share the same Qt runtime, which gets all of bsnes and its modules to ~1MB (including the debugger build); and Qt itself to about ~2.5MB.
However, there is some bad news. There's a serious bug in MinGW 4.4+, where it is not generating profile-guided input files (*.gcno files.) There is also a serious bug in Qt 4.5.2/Windows when using dynamic linking: the library is hanging indefinitely, forcing me to manually terminate the process upon exit. This prevents the creation of profile-guided output files (*.gcda files.) It would be tough enough to work around one, but facing both of these issues at once is too much.
I'm afraid I have no choice but to disable profile-guided optimizations until these issues can be addressed. I did not know about these bugs until trying to build the official v053 release, so it's too late to revert to an all-in-one binary now. And I'm simply not willing to stop releasing new builds because of bugs in third-party software. As soon as I can work around this, I'll post a new optimized binary. In the mean time, despite the fact that this release is actually more optimized, please understand that the Windows binary will run approximately ~10% slower than previous releases. I recommend keeping v052 for now if you need the performance. Linux and OS X users are unaffected.
Changelog:
    - save RAM is initialized to 0xff again to work around Ken Griffey Jr Baseball issue
    - libco adds assembly-optimized targets for Win64 and PPC-ELF [the latter courtesy of Kernigh]
    - libco/x86 and libco/amd64 use pre-assembled blocks now, obviates need for custom compilation flags
    - added a new cheat code search utility to the tools menu
    - separated filters from main bsnes binary to libsnesfilter / snesfilter.dll
    - added 2xSaI, Super 2xSaI and Super Eagle filters [kode54]
    - added full configuration settings for NTSC and scanline filters (12+ new options)
    - further optimized HQ2x filter [blargg]
    - added Vsync support to the Mac OS X OpenGL driver
    - added folder creation button to custom file load dialog
    - fixed a few oddities with loading of "game folders" (see older news for an explanation on what this is)
    - updated to blargg's file_extractor v1.0.0
    - added full support for multi-file archives (eg GoodMerge sets)
    - split multi-cart loading again (BS-X, Sufami Turbo, etc) as required for multi-file support
    - cleaned up handling of file placement detection for save files (.srm, .cht, etc)
    - file load dialog now remembers your previous folder path across runs even without a custom games folder assigned
    - windows now save their exact positioning and size across runs, they no longer forcibly center
    - menus now have radio button and check box icons where appropriate
    - debugger's hex editor now has a working scrollbar widget
    - added resize splitter to settings and tools windows
    - worked around Qt style sheet bug where subclassed widgets were not properly applying style properties
This commit is contained in:
byuu 2009-10-18 17:33:04 +00:00
parent b45ff0433e
commit e6e19a7c89
6 changed files with 484 additions and 151 deletions

104
amd64.c Normal file
View File

@ -0,0 +1,104 @@
/*
libco.amd64 (2009-10-12)
author: byuu
license: public domain
*/
#define LIBCO_C
#include "libco.h"
#include <assert.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
static thread_local long long co_active_buffer[64];
static thread_local cothread_t co_active_handle = 0;
static void (*co_swap)(cothread_t, cothread_t) = 0;
#ifdef _WIN32
//ABI: Win64
static unsigned char co_swap_function[] = {
0x48, 0x89, 0x22, 0x48, 0x8B, 0x21, 0x58, 0x48, 0x89, 0x6A, 0x08, 0x48, 0x89, 0x72, 0x10, 0x48,
0x89, 0x7A, 0x18, 0x48, 0x89, 0x5A, 0x20, 0x4C, 0x89, 0x62, 0x28, 0x4C, 0x89, 0x6A, 0x30, 0x4C,
0x89, 0x72, 0x38, 0x4C, 0x89, 0x7A, 0x40, 0x48, 0x81, 0xC2, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83,
0xE2, 0xF0, 0x0F, 0x29, 0x32, 0x0F, 0x29, 0x7A, 0x10, 0x44, 0x0F, 0x29, 0x42, 0x20, 0x44, 0x0F,
0x29, 0x4A, 0x30, 0x44, 0x0F, 0x29, 0x52, 0x40, 0x44, 0x0F, 0x29, 0x5A, 0x50, 0x44, 0x0F, 0x29,
0x62, 0x60, 0x44, 0x0F, 0x29, 0x6A, 0x70, 0x44, 0x0F, 0x29, 0xB2, 0x80, 0x00, 0x00, 0x00, 0x44,
0x0F, 0x29, 0xBA, 0x90, 0x00, 0x00, 0x00, 0x48, 0x8B, 0x69, 0x08, 0x48, 0x8B, 0x71, 0x10, 0x48,
0x8B, 0x79, 0x18, 0x48, 0x8B, 0x59, 0x20, 0x4C, 0x8B, 0x61, 0x28, 0x4C, 0x8B, 0x69, 0x30, 0x4C,
0x8B, 0x71, 0x38, 0x4C, 0x8B, 0x79, 0x40, 0x48, 0x81, 0xC1, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83,
0xE1, 0xF0, 0x0F, 0x29, 0x31, 0x0F, 0x29, 0x79, 0x10, 0x44, 0x0F, 0x29, 0x41, 0x20, 0x44, 0x0F,
0x29, 0x49, 0x30, 0x44, 0x0F, 0x29, 0x51, 0x40, 0x44, 0x0F, 0x29, 0x59, 0x50, 0x44, 0x0F, 0x29,
0x61, 0x60, 0x44, 0x0F, 0x29, 0x69, 0x70, 0x44, 0x0F, 0x29, 0xB1, 0x80, 0x00, 0x00, 0x00, 0x44,
0x0F, 0x29, 0xB9, 0x90, 0x00, 0x00, 0x00, 0xFF, 0xE0,
};
#include <windows.h>
void co_init() {
DWORD old_privileges;
VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges);
}
#else
//ABI: SystemV
static unsigned char co_swap_function[] = {
0x48, 0x89, 0x26, 0x48, 0x8B, 0x27, 0x58, 0x48, 0x89, 0x6E, 0x08, 0x48, 0x89, 0x5E, 0x10, 0x4C,
0x89, 0x66, 0x18, 0x4C, 0x89, 0x6E, 0x20, 0x4C, 0x89, 0x76, 0x28, 0x4C, 0x89, 0x7E, 0x30, 0x48,
0x8B, 0x6F, 0x08, 0x48, 0x8B, 0x5F, 0x10, 0x4C, 0x8B, 0x67, 0x18, 0x4C, 0x8B, 0x6F, 0x20, 0x4C,
0x8B, 0x77, 0x28, 0x4C, 0x8B, 0x7F, 0x30, 0xFF, 0xE0,
};
#include <unistd.h>
#include <sys/mman.h>
void co_init() {
unsigned long long addr = (unsigned long long)co_swap_function;
unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE));
unsigned long long size = (addr - base) + sizeof co_swap_function;
mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC);
}
#endif
static void crash() {
assert(0); /* called only if cothread_t entrypoint returns */
}
cothread_t co_active() {
if(!co_active_handle) co_active_handle = &co_active_buffer;
return co_active_handle;
}
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
cothread_t handle;
if(!co_swap) {
co_init();
co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
}
if(!co_active_handle) co_active_handle = &co_active_buffer;
size += 512; /* allocate additional space for storage */
size &= ~15; /* align stack to 16-byte boundary */
if(handle = (cothread_t)malloc(size)) {
long long *p = (long long*)((char*)handle + size); /* seek to top of stack */
*--p = (long long)crash; /* crash if entrypoint returns */
*--p = (long long)entrypoint; /* start of function */
*(long long*)handle = (long long)p; /* stack pointer */
}
return handle;
}
void co_delete(cothread_t handle) {
free(handle);
}
void co_switch(cothread_t handle) {
register cothread_t co_previous_handle = co_active_handle;
co_swap(co_active_handle = handle, co_previous_handle);
}
#ifdef __cplusplus
}
#endif

10
libco.c
View File

@ -6,15 +6,17 @@
#if defined(__GNUC__) && defined(__i386__)
#include "x86.c"
#elif defined(__GNUC__) && defined(__amd64__) && !defined(__MINGW64__)
#include "x86-64.c"
#elif defined(__MINGW64__)
#include "fiber.c"
#elif defined(__GNUC__) && defined(__amd64__)
#include "amd64.c"
#elif defined(__GNUC__) && defined(__powerpc__) && defined(__ELF__)
#include "ppc-elf.c"
#elif defined(__GNUC__)
#include "sjlj.c"
#elif defined(_MSC_VER) && defined(_M_IX86)
#include "x86.c"
#elif defined(_MSC_VER) && defined(_M_AMD64)
#include "amd64.c"
#elif defined(_MSC_VER)
#include "fiber.c"
#else
#error "libco: unsupported processor, compiler or operating system"

View File

@ -1,6 +1,6 @@
/*
libco
version: 0.13 rc2 (2008-01-28)
version: 0.15 (2009-10-12)
license: public domain
*/

325
ppc-elf.c Normal file
View File

@ -0,0 +1,325 @@
/*
* libco.ppc-elf
* author: Kernigh
* license: public domain
*
* PowerPC 32-bit ELF implementation of libco (for compile with GCC),
* ported from PowerPC Mac OS X implementation (ppc.s) by Vas Crabb.
* This ELF version works for OpenBSD, and might also work for FreeBSD,
* NetBSD and Linux.
*
* Note 1: This implementation does not handle the AltiVec/VMX
* registers, because the ELF ABI does not mention them,
* and my OpenBSD system is not using them.
*
* Note 2: If you want position-independent code, then you must
* define __PIC__. gcc -fpic or -fPIC defines __PIC__, but
* gcc -fpie or -fPIE might not. If you want to use -fpie
* or -fPIE, then you might need a manual definition:
* gcc -fpie -D__PIC__=1
* gcc -fPIE -D__PIC__=2
*
* The ELF ABI is "System V Application Binary Interface, PowerPC
* Processor Supplement", which you can get from
* <http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf>
* (PDF file, hosted by Linux Foundation).
*
* ELF and Mac OS X use similar conventions to allocate the registers,
* and to pass arguments and return values through registers. The main
* differences are that ELF has a slightly different stack format, that
* symbols are different (and without an extra underscore at the start),
* and that the assembly syntax is different.
*
* A function may destroy the values of volatile registers, but must
* preserve the values of nonvolatile registers. So the co_switch()
* function only saves the nonvolatile registers.
*
* [nonvolatile registers in ELF]
* %r1, %r14..%r31
* %f14..%f31
* %cr2..%cr4 in cr
*
* [volatile registers in ELF]
* %r0, %r3..%r10
* %f0..%f13
* %cr0, %cr1, %cr5..%cr7 in cr
* ctr, lr, xer
*
* lr (link register) is volatile, but it contains the return address,
* so co_switch must save lr.
*
* %r13 is the small data pointer. This is constant across threads, so
* co_switch() does not touch %r13.
*
* %r2 is a reserved register, so co_switch() does not touch %r2. Some
* systems might borrow an idea from the PowerPC Embedded ABI, and might
* use %r2 as a small read-only data pointer, which is constant across
* threads.
*/
#ifdef __cplusplus
extern "C" {
#endif
typedef void * cothread_t;
/*
* co_active_context is either in a global offset table (if we are
* compiling -fPIC or -fPIE) or has an absolute position.
*/
static void *co_main_stack_pointer;
static cothread_t co_active_context = &co_main_stack_pointer;
extern cothread_t co_active() {
return co_active_context;
}
/*
* Embedded assembly.
*
* We are not using the percent-sign substitution feature,
* so we must write "%r1", not "%%r1".
*
* We always write 'bl malloc@plt', not 'bl malloc'. The '@plt'
* is necessary in position-indepent code and seems to have no
* significant effect in fixed-position code.
*
* We never use the 'lmw' or 'stmw' instructions. The ELF ABI
* mentions that these instructions "are usually slower than
* a sequence of other instructions that have the same effect."
* We instead use sequences of 'lwz' or 'stz' instructions.
*/
__asm__("\n"
"### embedded assembly \n"
".section \".text\" \n"
" .balign 4 \n"
" \n"
/*
* void co_switch(co_thread to %r3)
*
* Allocate our stack frame of 240 bytes:
* Old New Value
* 4(%r1) 244(%r1) return address, used by us
* 0(%r1) 240(%r1) frame pointer
* 232(%r1) %f31
* 224(%r1) %f30
* ...
* 96(%r1) %f14
* 92(%r1) %r31
* 88(%r1) %r30
* ...
* 24(%r1) %r14
* 20(%r1) condition register
* 8(%r1) padding of 12 bytes
* 4(%r1) return address, never used
* 0(%r1) frame pointer
*
* Save our registers in our stack frame.
* Save our stack pointer in 0(%r4).
* Switch to the stack of the other thread.
* Restore registers and return.
*/
" .globl co_switch \n"
" .type co_switch, @function \n"
"co_switch: \n"
" mflr %r0 # %r0 = return address \n"
" mfcr %r9 # %r9 = condition register \n"
" stwu %r1, -240(%r1) # allocate stack frame \n"
" \n"
" stw %r0, 244(%r1) # save return address \n"
" stfd %f31, 232(%r1) # save floating-point regs \n"
" stfd %f30, 224(%r1) \n"
" stfd %f29, 216(%r1) \n"
" stfd %f28, 208(%r1) \n"
" stfd %f27, 200(%r1) \n"
" stfd %f26, 192(%r1) \n"
" stfd %f25, 184(%r1) \n"
" stfd %f24, 176(%r1) \n"
" stfd %f23, 168(%r1) \n"
" stfd %f22, 160(%r1) \n"
" stfd %f21, 152(%r1) \n"
" stfd %f20, 144(%r1) \n"
" stfd %f19, 136(%r1) \n"
" stfd %f18, 128(%r1) \n"
" stfd %f17, 120(%r1) \n"
" stfd %f16, 112(%r1) \n"
" stfd %f16, 104(%r1) \n"
" stfd %f14, 96(%r1) \n"
" stw %r31, 92(%r1) # save general-purpose regs \n"
" stw %r30, 88(%r1) \n"
" stw %r29, 84(%r1) \n"
" stw %r28, 80(%r1) \n"
" stw %r27, 76(%r1) \n"
" stw %r26, 72(%r1) \n"
" stw %r25, 68(%r1) \n"
" stw %r24, 64(%r1) \n"
" stw %r23, 60(%r1) \n"
" stw %r22, 56(%r1) \n"
" stw %r21, 52(%r1) \n"
" stw %r20, 48(%r1) \n"
" stw %r19, 44(%r1) \n"
" stw %r18, 40(%r1) \n"
" stw %r17, 36(%r1) \n"
" stw %r16, 32(%r1) \n"
" stw %r15, 28(%r1) \n"
" stw %r14, 24(%r1) \n"
" stw %r9, 20(%r1) # save condition reg \n"
" \n"
" # save current context, set new context \n"
" # %r4 = co_active_context \n"
" # co_active_context = %r3 \n"
#if __PIC__ == 2
" # position-independent code, large model (-fPIC) \n"
" bl _GLOBAL_OFFSET_TABLE_@local-4 \n"
" mflr %r8 # %r8 = address of got \n"
" addis %r7, %r8, co_active_context@got@ha \n"
" lwz %r6, co_active_context@got@l(%r7) \n"
" lwz %r4, 0(%r6) \n"
" stw %r3, 0(%r6) \n"
#elif __PIC__ == 1
" # position-independent code, small model (-fpic) \n"
" bl _GLOBAL_OFFSET_TABLE_@local-4 \n"
" mflr %r8 # %r8 = address of got \n"
" lwz %r7, co_active_context@got(%r8) \n"
" lwz %r4, 0(%r7) \n"
" stw %r3, 0(%r7) \n"
#else
" # fixed-position code \n"
" lis %r8, co_active_context@ha \n"
" lwz %r4, co_active_context@l(%r8) \n"
" stw %r3, co_active_context@l(%r8) \n"
#endif
" \n"
" # save current stack pointer \n"
" stw %r1, 0(%r4) \n"
" # get new stack pointer \n"
" lwz %r1, 0(%r3) \n"
" \n"
" lwz %r0, 244(%r1) # get return address \n"
" lfd %f31, 232(%r1) # restore floating-point regs \n"
" lfd %f30, 224(%r1) \n"
" lfd %f29, 216(%r1) \n"
" lfd %f28, 208(%r1) \n"
" lfd %f27, 200(%r1) \n"
" lfd %f26, 192(%r1) \n"
" lfd %f25, 184(%r1) \n"
" lfd %f24, 176(%r1) \n"
" lfd %f23, 168(%r1) \n"
" lfd %f22, 160(%r1) \n"
" lfd %f21, 152(%r1) \n"
" lfd %f20, 144(%r1) \n"
" lfd %f19, 136(%r1) \n"
" lfd %f18, 128(%r1) \n"
" lfd %f17, 120(%r1) \n"
" lfd %f16, 112(%r1) \n"
" lfd %f16, 104(%r1) \n"
" lfd %f14, 96(%r1) \n"
" lwz %r31, 92(%r1) # restore general-purpose regs \n"
" lwz %r30, 88(%r1) \n"
" lwz %r29, 84(%r1) \n"
" lwz %r28, 80(%r1) \n"
" lwz %r27, 76(%r1) \n"
" lwz %r26, 72(%r1) \n"
" lwz %r25, 68(%r1) \n"
" lwz %r24, 64(%r1) \n"
" lwz %r23, 60(%r1) \n"
" lwz %r22, 56(%r1) \n"
" lwz %r21, 52(%r1) \n"
" lwz %r20, 48(%r1) \n"
" lwz %r19, 44(%r1) \n"
" lwz %r18, 40(%r1) \n"
" lwz %r17, 36(%r1) \n"
" lwz %r16, 32(%r1) \n"
" lwz %r15, 28(%r1) \n"
" lwz %r14, 24(%r1) \n"
" lwz %r9, 20(%r1) # get condition reg \n"
" \n"
" addi %r1, %r1, 240 # free stack frame \n"
" mtlr %r0 # restore return address \n"
" mtcr %r9 # restore condition register \n"
" blr # return \n"
" .size co_switch, . - co_switch \n"
" \n"
/*
* cothread_t %r3 co_create(unsigned int stack_size %r3,
* void (*coentry %r4)())
*
* Allocate a new stack, such that when you co_switch to that
* stack, then co_switch returns to coentry.
*/
" .globl co_create \n"
" .type co_create, @function \n"
"co_create: \n"
" mflr %r0 # %r0 = return address \n"
" stwu %r1, -16(%r1) # allocate my stack frame \n"
" stw %r0, 20(%r1) # save return address \n"
" stw %r31, 12(%r1) # save %r31 \n"
" stw %r30, 8(%r1) # save %r30 \n"
" \n"
" mr %r30, %r3 # %r30 = stack_size \n"
" mr %r31, %r4 # %r31 = coentry \n"
" \n"
" # Call malloc(stack_size %r3) to allocate stack; \n"
" # malloc() probably uses good alignment. \n"
" # \n"
" bl malloc@plt # returns %r3 = low end \n"
" cmpwi %r3, 0 # if returned NULL, \n"
" beq- 1f # then abort \n"
" \n"
" # we return %r3 = low end of stack \n"
" add %r4, %r3, %r30 # %r4 = high end of stack \n"
" \n"
" # uncomment if malloc() uses wrong alignment \n"
" #rlwinm %r4,%r4,0,0,27 # force 16-byte alignment \n"
" \n"
/*
* Allocate two stack frames:
* 16 bytes for stack frame with return address
* 240 bytes for co_switch stack frame
*
* Old New Value
* -8(%r4) 248(%r5) padding of 8 bytes
* -12(%r4) 244(%r5) return address = coentry
* -16(%r4) 240(%r5) frame pointer = NULL
* 232(%r5) %f31 = 0
* ...
* 20(%r5) condition register = 0
* 0(%r5) frame pointer
*/
" li %r9, (240-20)/4+1 \n"
" addi %r5, %r4, -16 # allocate first stack frame \n"
" li %r0, 0 \n"
" stwu %r5, -240(%r5) # allocate second stack frame \n"
" li %r8, 20 \n"
" mtctr %r9 # loop %r9 times \n"
"2: # loop to store zero to 20(%r5) through 240(%r5) \n"
" stwx %r0, %r5, %r8 \n"
" addi %r8, %r8, 4 # index += 4 \n"
" bdnz+ 2b # ctr -= 1, branch if nonzero \n"
" \n"
" stw %r31, 244(%r5) # return address = coentry \n"
" stw %r5, 0(%r3) # save stack pointer \n"
" \n"
" lwz %r0, 20(%r1) # get return address \n"
" lwz %r31, 12(%r1) # restore %r31 \n"
" lwz %r30, 8(%r1) # restore %r30 \n"
" mtlr %r0 # restore return address \n"
" addi %r1, %r1, 16 # free stack frame \n"
" blr # return \n"
" \n"
"1: b abort@plt # branch 1f to abort \n"
" .size co_create, . - co_create \n"
" \n"
/*
* void co_delete(cothread_t) => void free(void *)
*/
" .globl co_delete \n"
" .type co_delete, @function \n"
"co_delete: \n"
" b free@plt \n"
" \n"
);
#ifdef __cplusplus
}
#endif

View File

@ -1,81 +0,0 @@
/*
libco.x86-64 (2008-01-28)
author: byuu
license: public domain
*/
#define LIBCO_C
#include "libco.h"
#include <assert.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
static thread_local long co_active_buffer[32];
static thread_local cothread_t co_active_ = 0;
static void crash() {
assert(0); /* called only if cothread_t entrypoint returns */
}
cothread_t co_active() {
if(!co_active_) co_active_ = &co_active_buffer;
return co_active_;
}
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
cothread_t handle;
assert(sizeof(long) == 8);
if(!co_active_) co_active_ = &co_active_buffer;
size += 128; /* allocate additional space for storage */
size &= ~15; /* align stack to 16-byte boundary */
if(handle = (cothread_t)calloc(size, 1)) {
long *p = (long*)((char*)handle + size); /* seek to top of stack */
*--p = (long)crash; /* crash if entrypoint returns */
*--p = (long)entrypoint; /* start of function */
*(long*)handle = (long)p; /* stack pointer */
}
return handle;
}
void co_delete(cothread_t handle) {
free(handle);
}
void co_switch(cothread_t to) {
register long stack = *(long*)to; /* stack[0] = "to" thread entry point */
register cothread_t from = co_active_;
co_active_ = to;
__asm__ __volatile__(
"movq %%rsp,(%1) \n\t" /* save old stack pointer */
"movq (%0),%%rsp \n\t" /* load new stack pointer */
"addq $8,%%rsp \n\t" /* "pop" return address off stack */
"movq %%rbp, 8(%1) \n\t" /* backup non-volatile registers */
"movq %%rbx,16(%1) \n\t"
"movq %%r12,24(%1) \n\t"
"movq %%r13,32(%1) \n\t"
"movq %%r14,40(%1) \n\t"
"movq %%r15,48(%1) \n\t"
"movq 8(%0),%%rbp \n\t" /* restore non-volatile registers */
"movq 16(%0),%%rbx \n\t"
"movq 24(%0),%%r12 \n\t"
"movq 32(%0),%%r13 \n\t"
"movq 40(%0),%%r14 \n\t"
"movq 48(%0),%%r15 \n\t"
"jmp *(%2) \n\t" /* jump into "to" thread */
: /* no outputs */
: "r" (to), "r" (from), "r" (stack)
);
}
#ifdef __cplusplus
}
#endif

113
x86.c
View File

@ -1,5 +1,5 @@
/*
libco.x86 (2008-01-28)
libco.x86 (2009-10-12)
author: byuu
license: public domain
*/
@ -13,26 +13,63 @@
extern "C" {
#endif
static thread_local long co_active_buffer[32];
static thread_local cothread_t co_active_ = 0;
#if defined(_MSC_VER)
#define fastcall __fastcall
#elif defined(__GNUC__)
#define fastcall __attribute__((fastcall))
#else
#error "libco: please define fastcall macro"
#endif
static thread_local long co_active_buffer[64];
static thread_local cothread_t co_active_handle = 0;
static void (fastcall *co_swap)(cothread_t, cothread_t) = 0;
//ABI: fastcall
static unsigned char co_swap_function[] = {
0x89, 0x22, 0x8B, 0x21, 0x58, 0x89, 0x6A, 0x04, 0x89, 0x72, 0x08, 0x89, 0x7A, 0x0C, 0x89, 0x5A,
0x10, 0x8B, 0x69, 0x04, 0x8B, 0x71, 0x08, 0x8B, 0x79, 0x0C, 0x8B, 0x59, 0x10, 0xFF, 0xE0,
};
#ifdef _WIN32
#include <windows.h>
void co_init() {
DWORD old_privileges;
VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges);
}
#else
#include <unistd.h>
#include <sys/mman.h>
void co_init() {
unsigned long addr = (unsigned long)co_swap_function;
unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE));
unsigned long size = (addr - base) + sizeof co_swap_function;
mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC);
}
#endif
static void crash() {
assert(0); /* called only if cothread_t entrypoint returns */
}
cothread_t co_active() {
if(!co_active_) co_active_ = &co_active_buffer;
return co_active_;
if(!co_active_handle) co_active_handle = &co_active_buffer;
return co_active_handle;
}
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
cothread_t handle;
assert(sizeof(long) == 4);
if(!co_active_) co_active_ = &co_active_buffer;
size += 128; /* allocate additional space for storage */
if(!co_swap) {
co_init();
co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function;
}
if(!co_active_handle) co_active_handle = &co_active_buffer;
size += 256; /* allocate additional space for storage */
size &= ~15; /* align stack to 16-byte boundary */
if(handle = (cothread_t)calloc(size, 1)) {
if(handle = (cothread_t)malloc(size)) {
long *p = (long*)((char*)handle + size); /* seek to top of stack */
*--p = (long)crash; /* crash if entrypoint returns */
*--p = (long)entrypoint; /* start of function */
@ -46,65 +83,11 @@ void co_delete(cothread_t handle) {
free(handle);
}
#if defined(__GNUC__)
void co_switch(cothread_t to) {
register long stack = *(long*)to; /* stack[0] = "to" thread entry point */
register cothread_t from = co_active_;
co_active_ = to;
__asm__ __volatile__(
"movl %%esp,(%1) \n\t" /* save old stack pointer */
"movl (%0),%%esp \n\t" /* load new stack pointer */
"addl $4,%%esp \n\t" /* "pop" return address off stack */
"movl %%ebp, 4(%1) \n\t" /* backup non-volatile registers */
"movl %%esi, 8(%1) \n\t"
"movl %%edi,12(%1) \n\t"
"movl %%ebx,16(%1) \n\t"
"movl 4(%0),%%ebp \n\t" /* restore non-volatile registers */
"movl 8(%0),%%esi \n\t"
"movl 12(%0),%%edi \n\t"
"movl 16(%0),%%ebx \n\t"
"jmp *(%2) \n\t" /* jump into "to" thread */
: /* no outputs */
: "r" (to), "r" (from), "r" (stack)
);
}
#elif defined(_MSC_VER)
__declspec(naked) __declspec(noinline)
static void __fastcall co_swap(register cothread_t to, register cothread_t from) {
/* ecx = to, edx = from */
__asm {
mov [edx],esp
mov esp,[ecx]
pop eax
mov [edx+ 4],ebp
mov [edx+ 8],esi
mov [edx+12],edi
mov [edx+16],ebx
mov ebp,[ecx+ 4]
mov esi,[ecx+ 8]
mov edi,[ecx+12]
mov ebx,[ecx+16]
jmp eax
}
}
void co_switch(cothread_t handle) {
register cothread_t co_prev_ = co_active_;
co_swap(co_active_ = handle, co_prev_);
register cothread_t co_previous_handle = co_active_handle;
co_swap(co_active_handle = handle, co_previous_handle);
}
#endif
#ifdef __cplusplus
}
#endif