diff --git a/amd64.c b/amd64.c new file mode 100644 index 00000000..5f1cfca9 --- /dev/null +++ b/amd64.c @@ -0,0 +1,104 @@ +/* + libco.amd64 (2009-10-12) + author: byuu + license: public domain +*/ + +#define LIBCO_C +#include "libco.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static thread_local long long co_active_buffer[64]; +static thread_local cothread_t co_active_handle = 0; +static void (*co_swap)(cothread_t, cothread_t) = 0; + +#ifdef _WIN32 + //ABI: Win64 + static unsigned char co_swap_function[] = { + 0x48, 0x89, 0x22, 0x48, 0x8B, 0x21, 0x58, 0x48, 0x89, 0x6A, 0x08, 0x48, 0x89, 0x72, 0x10, 0x48, + 0x89, 0x7A, 0x18, 0x48, 0x89, 0x5A, 0x20, 0x4C, 0x89, 0x62, 0x28, 0x4C, 0x89, 0x6A, 0x30, 0x4C, + 0x89, 0x72, 0x38, 0x4C, 0x89, 0x7A, 0x40, 0x48, 0x81, 0xC2, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83, + 0xE2, 0xF0, 0x0F, 0x29, 0x32, 0x0F, 0x29, 0x7A, 0x10, 0x44, 0x0F, 0x29, 0x42, 0x20, 0x44, 0x0F, + 0x29, 0x4A, 0x30, 0x44, 0x0F, 0x29, 0x52, 0x40, 0x44, 0x0F, 0x29, 0x5A, 0x50, 0x44, 0x0F, 0x29, + 0x62, 0x60, 0x44, 0x0F, 0x29, 0x6A, 0x70, 0x44, 0x0F, 0x29, 0xB2, 0x80, 0x00, 0x00, 0x00, 0x44, + 0x0F, 0x29, 0xBA, 0x90, 0x00, 0x00, 0x00, 0x48, 0x8B, 0x69, 0x08, 0x48, 0x8B, 0x71, 0x10, 0x48, + 0x8B, 0x79, 0x18, 0x48, 0x8B, 0x59, 0x20, 0x4C, 0x8B, 0x61, 0x28, 0x4C, 0x8B, 0x69, 0x30, 0x4C, + 0x8B, 0x71, 0x38, 0x4C, 0x8B, 0x79, 0x40, 0x48, 0x81, 0xC1, 0x80, 0x00, 0x00, 0x00, 0x48, 0x83, + 0xE1, 0xF0, 0x0F, 0x29, 0x31, 0x0F, 0x29, 0x79, 0x10, 0x44, 0x0F, 0x29, 0x41, 0x20, 0x44, 0x0F, + 0x29, 0x49, 0x30, 0x44, 0x0F, 0x29, 0x51, 0x40, 0x44, 0x0F, 0x29, 0x59, 0x50, 0x44, 0x0F, 0x29, + 0x61, 0x60, 0x44, 0x0F, 0x29, 0x69, 0x70, 0x44, 0x0F, 0x29, 0xB1, 0x80, 0x00, 0x00, 0x00, 0x44, + 0x0F, 0x29, 0xB9, 0x90, 0x00, 0x00, 0x00, 0xFF, 0xE0, + }; + + #include + + void co_init() { + DWORD old_privileges; + VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); + } +#else + //ABI: SystemV + static unsigned char co_swap_function[] = { + 0x48, 0x89, 0x26, 0x48, 0x8B, 0x27, 0x58, 0x48, 0x89, 0x6E, 0x08, 0x48, 0x89, 0x5E, 0x10, 0x4C, + 0x89, 0x66, 0x18, 0x4C, 0x89, 0x6E, 0x20, 0x4C, 0x89, 0x76, 0x28, 0x4C, 0x89, 0x7E, 0x30, 0x48, + 0x8B, 0x6F, 0x08, 0x48, 0x8B, 0x5F, 0x10, 0x4C, 0x8B, 0x67, 0x18, 0x4C, 0x8B, 0x6F, 0x20, 0x4C, + 0x8B, 0x77, 0x28, 0x4C, 0x8B, 0x7F, 0x30, 0xFF, 0xE0, + }; + + #include + #include + + void co_init() { + unsigned long long addr = (unsigned long long)co_swap_function; + unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE)); + unsigned long long size = (addr - base) + sizeof co_swap_function; + mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); + } +#endif + +static void crash() { + assert(0); /* called only if cothread_t entrypoint returns */ +} + +cothread_t co_active() { + if(!co_active_handle) co_active_handle = &co_active_buffer; + return co_active_handle; +} + +cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { + cothread_t handle; + if(!co_swap) { + co_init(); + co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + size += 512; /* allocate additional space for storage */ + size &= ~15; /* align stack to 16-byte boundary */ + + if(handle = (cothread_t)malloc(size)) { + long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ + *--p = (long long)crash; /* crash if entrypoint returns */ + *--p = (long long)entrypoint; /* start of function */ + *(long long*)handle = (long long)p; /* stack pointer */ + } + + return handle; +} + +void co_delete(cothread_t handle) { + free(handle); +} + +void co_switch(cothread_t handle) { + register cothread_t co_previous_handle = co_active_handle; + co_swap(co_active_handle = handle, co_previous_handle); +} + +#ifdef __cplusplus +} +#endif diff --git a/libco.c b/libco.c index 604b37df..dd020fe3 100644 --- a/libco.c +++ b/libco.c @@ -6,15 +6,17 @@ #if defined(__GNUC__) && defined(__i386__) #include "x86.c" -#elif defined(__GNUC__) && defined(__amd64__) && !defined(__MINGW64__) - #include "x86-64.c" -#elif defined(__MINGW64__) - #include "fiber.c" +#elif defined(__GNUC__) && defined(__amd64__) + #include "amd64.c" +#elif defined(__GNUC__) && defined(__powerpc__) && defined(__ELF__) + #include "ppc-elf.c" #elif defined(__GNUC__) #include "sjlj.c" #elif defined(_MSC_VER) && defined(_M_IX86) #include "x86.c" #elif defined(_MSC_VER) && defined(_M_AMD64) + #include "amd64.c" +#elif defined(_MSC_VER) #include "fiber.c" #else #error "libco: unsupported processor, compiler or operating system" diff --git a/libco.h b/libco.h index d8348c4e..b1b49a29 100644 --- a/libco.h +++ b/libco.h @@ -1,6 +1,6 @@ /* libco - version: 0.13 rc2 (2008-01-28) + version: 0.15 (2009-10-12) license: public domain */ diff --git a/ppc-elf.c b/ppc-elf.c new file mode 100644 index 00000000..5740f77f --- /dev/null +++ b/ppc-elf.c @@ -0,0 +1,325 @@ +/* + * libco.ppc-elf + * author: Kernigh + * license: public domain + * + * PowerPC 32-bit ELF implementation of libco (for compile with GCC), + * ported from PowerPC Mac OS X implementation (ppc.s) by Vas Crabb. + * This ELF version works for OpenBSD, and might also work for FreeBSD, + * NetBSD and Linux. + * + * Note 1: This implementation does not handle the AltiVec/VMX + * registers, because the ELF ABI does not mention them, + * and my OpenBSD system is not using them. + * + * Note 2: If you want position-independent code, then you must + * define __PIC__. gcc -fpic or -fPIC defines __PIC__, but + * gcc -fpie or -fPIE might not. If you want to use -fpie + * or -fPIE, then you might need a manual definition: + * gcc -fpie -D__PIC__=1 + * gcc -fPIE -D__PIC__=2 + * + * The ELF ABI is "System V Application Binary Interface, PowerPC + * Processor Supplement", which you can get from + * + * (PDF file, hosted by Linux Foundation). + * + * ELF and Mac OS X use similar conventions to allocate the registers, + * and to pass arguments and return values through registers. The main + * differences are that ELF has a slightly different stack format, that + * symbols are different (and without an extra underscore at the start), + * and that the assembly syntax is different. + * + * A function may destroy the values of volatile registers, but must + * preserve the values of nonvolatile registers. So the co_switch() + * function only saves the nonvolatile registers. + * + * [nonvolatile registers in ELF] + * %r1, %r14..%r31 + * %f14..%f31 + * %cr2..%cr4 in cr + * + * [volatile registers in ELF] + * %r0, %r3..%r10 + * %f0..%f13 + * %cr0, %cr1, %cr5..%cr7 in cr + * ctr, lr, xer + * + * lr (link register) is volatile, but it contains the return address, + * so co_switch must save lr. + * + * %r13 is the small data pointer. This is constant across threads, so + * co_switch() does not touch %r13. + * + * %r2 is a reserved register, so co_switch() does not touch %r2. Some + * systems might borrow an idea from the PowerPC Embedded ABI, and might + * use %r2 as a small read-only data pointer, which is constant across + * threads. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void * cothread_t; + +/* + * co_active_context is either in a global offset table (if we are + * compiling -fPIC or -fPIE) or has an absolute position. + */ +static void *co_main_stack_pointer; +static cothread_t co_active_context = &co_main_stack_pointer; + +extern cothread_t co_active() { + return co_active_context; +} + +/* + * Embedded assembly. + * + * We are not using the percent-sign substitution feature, + * so we must write "%r1", not "%%r1". + * + * We always write 'bl malloc@plt', not 'bl malloc'. The '@plt' + * is necessary in position-indepent code and seems to have no + * significant effect in fixed-position code. + * + * We never use the 'lmw' or 'stmw' instructions. The ELF ABI + * mentions that these instructions "are usually slower than + * a sequence of other instructions that have the same effect." + * We instead use sequences of 'lwz' or 'stz' instructions. + */ +__asm__("\n" +"### embedded assembly \n" +".section \".text\" \n" +" .balign 4 \n" +" \n" +/* + * void co_switch(co_thread to %r3) + * + * Allocate our stack frame of 240 bytes: + * Old New Value + * 4(%r1) 244(%r1) return address, used by us + * 0(%r1) 240(%r1) frame pointer + * 232(%r1) %f31 + * 224(%r1) %f30 + * ... + * 96(%r1) %f14 + * 92(%r1) %r31 + * 88(%r1) %r30 + * ... + * 24(%r1) %r14 + * 20(%r1) condition register + * 8(%r1) padding of 12 bytes + * 4(%r1) return address, never used + * 0(%r1) frame pointer + * + * Save our registers in our stack frame. + * Save our stack pointer in 0(%r4). + * Switch to the stack of the other thread. + * Restore registers and return. + */ +" .globl co_switch \n" +" .type co_switch, @function \n" +"co_switch: \n" +" mflr %r0 # %r0 = return address \n" +" mfcr %r9 # %r9 = condition register \n" +" stwu %r1, -240(%r1) # allocate stack frame \n" +" \n" +" stw %r0, 244(%r1) # save return address \n" +" stfd %f31, 232(%r1) # save floating-point regs \n" +" stfd %f30, 224(%r1) \n" +" stfd %f29, 216(%r1) \n" +" stfd %f28, 208(%r1) \n" +" stfd %f27, 200(%r1) \n" +" stfd %f26, 192(%r1) \n" +" stfd %f25, 184(%r1) \n" +" stfd %f24, 176(%r1) \n" +" stfd %f23, 168(%r1) \n" +" stfd %f22, 160(%r1) \n" +" stfd %f21, 152(%r1) \n" +" stfd %f20, 144(%r1) \n" +" stfd %f19, 136(%r1) \n" +" stfd %f18, 128(%r1) \n" +" stfd %f17, 120(%r1) \n" +" stfd %f16, 112(%r1) \n" +" stfd %f16, 104(%r1) \n" +" stfd %f14, 96(%r1) \n" +" stw %r31, 92(%r1) # save general-purpose regs \n" +" stw %r30, 88(%r1) \n" +" stw %r29, 84(%r1) \n" +" stw %r28, 80(%r1) \n" +" stw %r27, 76(%r1) \n" +" stw %r26, 72(%r1) \n" +" stw %r25, 68(%r1) \n" +" stw %r24, 64(%r1) \n" +" stw %r23, 60(%r1) \n" +" stw %r22, 56(%r1) \n" +" stw %r21, 52(%r1) \n" +" stw %r20, 48(%r1) \n" +" stw %r19, 44(%r1) \n" +" stw %r18, 40(%r1) \n" +" stw %r17, 36(%r1) \n" +" stw %r16, 32(%r1) \n" +" stw %r15, 28(%r1) \n" +" stw %r14, 24(%r1) \n" +" stw %r9, 20(%r1) # save condition reg \n" +" \n" +" # save current context, set new context \n" +" # %r4 = co_active_context \n" +" # co_active_context = %r3 \n" +#if __PIC__ == 2 +" # position-independent code, large model (-fPIC) \n" +" bl _GLOBAL_OFFSET_TABLE_@local-4 \n" +" mflr %r8 # %r8 = address of got \n" +" addis %r7, %r8, co_active_context@got@ha \n" +" lwz %r6, co_active_context@got@l(%r7) \n" +" lwz %r4, 0(%r6) \n" +" stw %r3, 0(%r6) \n" +#elif __PIC__ == 1 +" # position-independent code, small model (-fpic) \n" +" bl _GLOBAL_OFFSET_TABLE_@local-4 \n" +" mflr %r8 # %r8 = address of got \n" +" lwz %r7, co_active_context@got(%r8) \n" +" lwz %r4, 0(%r7) \n" +" stw %r3, 0(%r7) \n" +#else +" # fixed-position code \n" +" lis %r8, co_active_context@ha \n" +" lwz %r4, co_active_context@l(%r8) \n" +" stw %r3, co_active_context@l(%r8) \n" +#endif +" \n" +" # save current stack pointer \n" +" stw %r1, 0(%r4) \n" +" # get new stack pointer \n" +" lwz %r1, 0(%r3) \n" +" \n" +" lwz %r0, 244(%r1) # get return address \n" +" lfd %f31, 232(%r1) # restore floating-point regs \n" +" lfd %f30, 224(%r1) \n" +" lfd %f29, 216(%r1) \n" +" lfd %f28, 208(%r1) \n" +" lfd %f27, 200(%r1) \n" +" lfd %f26, 192(%r1) \n" +" lfd %f25, 184(%r1) \n" +" lfd %f24, 176(%r1) \n" +" lfd %f23, 168(%r1) \n" +" lfd %f22, 160(%r1) \n" +" lfd %f21, 152(%r1) \n" +" lfd %f20, 144(%r1) \n" +" lfd %f19, 136(%r1) \n" +" lfd %f18, 128(%r1) \n" +" lfd %f17, 120(%r1) \n" +" lfd %f16, 112(%r1) \n" +" lfd %f16, 104(%r1) \n" +" lfd %f14, 96(%r1) \n" +" lwz %r31, 92(%r1) # restore general-purpose regs \n" +" lwz %r30, 88(%r1) \n" +" lwz %r29, 84(%r1) \n" +" lwz %r28, 80(%r1) \n" +" lwz %r27, 76(%r1) \n" +" lwz %r26, 72(%r1) \n" +" lwz %r25, 68(%r1) \n" +" lwz %r24, 64(%r1) \n" +" lwz %r23, 60(%r1) \n" +" lwz %r22, 56(%r1) \n" +" lwz %r21, 52(%r1) \n" +" lwz %r20, 48(%r1) \n" +" lwz %r19, 44(%r1) \n" +" lwz %r18, 40(%r1) \n" +" lwz %r17, 36(%r1) \n" +" lwz %r16, 32(%r1) \n" +" lwz %r15, 28(%r1) \n" +" lwz %r14, 24(%r1) \n" +" lwz %r9, 20(%r1) # get condition reg \n" +" \n" +" addi %r1, %r1, 240 # free stack frame \n" +" mtlr %r0 # restore return address \n" +" mtcr %r9 # restore condition register \n" +" blr # return \n" +" .size co_switch, . - co_switch \n" +" \n" +/* + * cothread_t %r3 co_create(unsigned int stack_size %r3, + * void (*coentry %r4)()) + * + * Allocate a new stack, such that when you co_switch to that + * stack, then co_switch returns to coentry. + */ +" .globl co_create \n" +" .type co_create, @function \n" +"co_create: \n" +" mflr %r0 # %r0 = return address \n" +" stwu %r1, -16(%r1) # allocate my stack frame \n" +" stw %r0, 20(%r1) # save return address \n" +" stw %r31, 12(%r1) # save %r31 \n" +" stw %r30, 8(%r1) # save %r30 \n" +" \n" +" mr %r30, %r3 # %r30 = stack_size \n" +" mr %r31, %r4 # %r31 = coentry \n" +" \n" +" # Call malloc(stack_size %r3) to allocate stack; \n" +" # malloc() probably uses good alignment. \n" +" # \n" +" bl malloc@plt # returns %r3 = low end \n" +" cmpwi %r3, 0 # if returned NULL, \n" +" beq- 1f # then abort \n" +" \n" +" # we return %r3 = low end of stack \n" +" add %r4, %r3, %r30 # %r4 = high end of stack \n" +" \n" +" # uncomment if malloc() uses wrong alignment \n" +" #rlwinm %r4,%r4,0,0,27 # force 16-byte alignment \n" +" \n" + /* + * Allocate two stack frames: + * 16 bytes for stack frame with return address + * 240 bytes for co_switch stack frame + * + * Old New Value + * -8(%r4) 248(%r5) padding of 8 bytes + * -12(%r4) 244(%r5) return address = coentry + * -16(%r4) 240(%r5) frame pointer = NULL + * 232(%r5) %f31 = 0 + * ... + * 20(%r5) condition register = 0 + * 0(%r5) frame pointer + */ +" li %r9, (240-20)/4+1 \n" +" addi %r5, %r4, -16 # allocate first stack frame \n" +" li %r0, 0 \n" +" stwu %r5, -240(%r5) # allocate second stack frame \n" +" li %r8, 20 \n" +" mtctr %r9 # loop %r9 times \n" +"2: # loop to store zero to 20(%r5) through 240(%r5) \n" +" stwx %r0, %r5, %r8 \n" +" addi %r8, %r8, 4 # index += 4 \n" +" bdnz+ 2b # ctr -= 1, branch if nonzero \n" +" \n" +" stw %r31, 244(%r5) # return address = coentry \n" +" stw %r5, 0(%r3) # save stack pointer \n" +" \n" +" lwz %r0, 20(%r1) # get return address \n" +" lwz %r31, 12(%r1) # restore %r31 \n" +" lwz %r30, 8(%r1) # restore %r30 \n" +" mtlr %r0 # restore return address \n" +" addi %r1, %r1, 16 # free stack frame \n" +" blr # return \n" +" \n" +"1: b abort@plt # branch 1f to abort \n" +" .size co_create, . - co_create \n" +" \n" +/* + * void co_delete(cothread_t) => void free(void *) + */ +" .globl co_delete \n" +" .type co_delete, @function \n" +"co_delete: \n" +" b free@plt \n" +" \n" +); + +#ifdef __cplusplus +} +#endif diff --git a/x86-64.c b/x86-64.c deleted file mode 100644 index 2e2a1131..00000000 --- a/x86-64.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - libco.x86-64 (2008-01-28) - author: byuu - license: public domain -*/ - -#define LIBCO_C -#include "libco.h" -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static thread_local long co_active_buffer[32]; -static thread_local cothread_t co_active_ = 0; - -static void crash() { - assert(0); /* called only if cothread_t entrypoint returns */ -} - -cothread_t co_active() { - if(!co_active_) co_active_ = &co_active_buffer; - return co_active_; -} - -cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { - cothread_t handle; - assert(sizeof(long) == 8); - if(!co_active_) co_active_ = &co_active_buffer; - size += 128; /* allocate additional space for storage */ - size &= ~15; /* align stack to 16-byte boundary */ - - if(handle = (cothread_t)calloc(size, 1)) { - long *p = (long*)((char*)handle + size); /* seek to top of stack */ - *--p = (long)crash; /* crash if entrypoint returns */ - *--p = (long)entrypoint; /* start of function */ - *(long*)handle = (long)p; /* stack pointer */ - } - - return handle; -} - -void co_delete(cothread_t handle) { - free(handle); -} - -void co_switch(cothread_t to) { - register long stack = *(long*)to; /* stack[0] = "to" thread entry point */ - register cothread_t from = co_active_; - co_active_ = to; - - __asm__ __volatile__( - "movq %%rsp,(%1) \n\t" /* save old stack pointer */ - "movq (%0),%%rsp \n\t" /* load new stack pointer */ - "addq $8,%%rsp \n\t" /* "pop" return address off stack */ - - "movq %%rbp, 8(%1) \n\t" /* backup non-volatile registers */ - "movq %%rbx,16(%1) \n\t" - "movq %%r12,24(%1) \n\t" - "movq %%r13,32(%1) \n\t" - "movq %%r14,40(%1) \n\t" - "movq %%r15,48(%1) \n\t" - - "movq 8(%0),%%rbp \n\t" /* restore non-volatile registers */ - "movq 16(%0),%%rbx \n\t" - "movq 24(%0),%%r12 \n\t" - "movq 32(%0),%%r13 \n\t" - "movq 40(%0),%%r14 \n\t" - "movq 48(%0),%%r15 \n\t" - - "jmp *(%2) \n\t" /* jump into "to" thread */ - : /* no outputs */ - : "r" (to), "r" (from), "r" (stack) - ); -} - -#ifdef __cplusplus -} -#endif diff --git a/x86.c b/x86.c index 3a5507ff..d8f820b0 100644 --- a/x86.c +++ b/x86.c @@ -1,5 +1,5 @@ /* - libco.x86 (2008-01-28) + libco.x86 (2009-10-12) author: byuu license: public domain */ @@ -13,26 +13,63 @@ extern "C" { #endif -static thread_local long co_active_buffer[32]; -static thread_local cothread_t co_active_ = 0; +#if defined(_MSC_VER) + #define fastcall __fastcall +#elif defined(__GNUC__) + #define fastcall __attribute__((fastcall)) +#else + #error "libco: please define fastcall macro" +#endif + +static thread_local long co_active_buffer[64]; +static thread_local cothread_t co_active_handle = 0; +static void (fastcall *co_swap)(cothread_t, cothread_t) = 0; + +//ABI: fastcall +static unsigned char co_swap_function[] = { + 0x89, 0x22, 0x8B, 0x21, 0x58, 0x89, 0x6A, 0x04, 0x89, 0x72, 0x08, 0x89, 0x7A, 0x0C, 0x89, 0x5A, + 0x10, 0x8B, 0x69, 0x04, 0x8B, 0x71, 0x08, 0x8B, 0x79, 0x0C, 0x8B, 0x59, 0x10, 0xFF, 0xE0, +}; + +#ifdef _WIN32 + #include + + void co_init() { + DWORD old_privileges; + VirtualProtect(co_swap_function, sizeof co_swap_function, PAGE_EXECUTE_READWRITE, &old_privileges); + } +#else + #include + #include + + void co_init() { + unsigned long addr = (unsigned long)co_swap_function; + unsigned long base = addr - (addr % sysconf(_SC_PAGESIZE)); + unsigned long size = (addr - base) + sizeof co_swap_function; + mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); + } +#endif static void crash() { assert(0); /* called only if cothread_t entrypoint returns */ } cothread_t co_active() { - if(!co_active_) co_active_ = &co_active_buffer; - return co_active_; + if(!co_active_handle) co_active_handle = &co_active_buffer; + return co_active_handle; } cothread_t co_create(unsigned int size, void (*entrypoint)(void)) { cothread_t handle; - assert(sizeof(long) == 4); - if(!co_active_) co_active_ = &co_active_buffer; - size += 128; /* allocate additional space for storage */ + if(!co_swap) { + co_init(); + co_swap = (void (fastcall*)(cothread_t, cothread_t))co_swap_function; + } + if(!co_active_handle) co_active_handle = &co_active_buffer; + size += 256; /* allocate additional space for storage */ size &= ~15; /* align stack to 16-byte boundary */ - if(handle = (cothread_t)calloc(size, 1)) { + if(handle = (cothread_t)malloc(size)) { long *p = (long*)((char*)handle + size); /* seek to top of stack */ *--p = (long)crash; /* crash if entrypoint returns */ *--p = (long)entrypoint; /* start of function */ @@ -46,65 +83,11 @@ void co_delete(cothread_t handle) { free(handle); } -#if defined(__GNUC__) - -void co_switch(cothread_t to) { - register long stack = *(long*)to; /* stack[0] = "to" thread entry point */ - register cothread_t from = co_active_; - co_active_ = to; - - __asm__ __volatile__( - "movl %%esp,(%1) \n\t" /* save old stack pointer */ - "movl (%0),%%esp \n\t" /* load new stack pointer */ - "addl $4,%%esp \n\t" /* "pop" return address off stack */ - - "movl %%ebp, 4(%1) \n\t" /* backup non-volatile registers */ - "movl %%esi, 8(%1) \n\t" - "movl %%edi,12(%1) \n\t" - "movl %%ebx,16(%1) \n\t" - - "movl 4(%0),%%ebp \n\t" /* restore non-volatile registers */ - "movl 8(%0),%%esi \n\t" - "movl 12(%0),%%edi \n\t" - "movl 16(%0),%%ebx \n\t" - - "jmp *(%2) \n\t" /* jump into "to" thread */ - : /* no outputs */ - : "r" (to), "r" (from), "r" (stack) - ); -} - -#elif defined(_MSC_VER) - -__declspec(naked) __declspec(noinline) -static void __fastcall co_swap(register cothread_t to, register cothread_t from) { - /* ecx = to, edx = from */ - __asm { - mov [edx],esp - mov esp,[ecx] - pop eax - - mov [edx+ 4],ebp - mov [edx+ 8],esi - mov [edx+12],edi - mov [edx+16],ebx - - mov ebp,[ecx+ 4] - mov esi,[ecx+ 8] - mov edi,[ecx+12] - mov ebx,[ecx+16] - - jmp eax - } -} - void co_switch(cothread_t handle) { - register cothread_t co_prev_ = co_active_; - co_swap(co_active_ = handle, co_prev_); + register cothread_t co_previous_handle = co_active_handle; + co_swap(co_active_handle = handle, co_previous_handle); } -#endif - #ifdef __cplusplus } #endif