diff --git a/libco.c b/libco.c index dd020fe3..55676263 100644 --- a/libco.c +++ b/libco.c @@ -8,8 +8,8 @@ #include "x86.c" #elif defined(__GNUC__) && defined(__amd64__) #include "amd64.c" -#elif defined(__GNUC__) && defined(__powerpc__) && defined(__ELF__) - #include "ppc-elf.c" +#elif defined(__GNUC__) && defined(_ARCH_PPC) + #include "ppc.c" #elif defined(__GNUC__) #include "sjlj.c" #elif defined(_MSC_VER) && defined(_M_IX86) diff --git a/libco.h b/libco.h index b1b49a29..deb954fb 100644 --- a/libco.h +++ b/libco.h @@ -1,6 +1,6 @@ /* libco - version: 0.15 (2009-10-12) + version: 0.16 (2010-12-24) license: public domain */ diff --git a/ppc-elf.c b/ppc-elf.c deleted file mode 100644 index 5740f77f..00000000 --- a/ppc-elf.c +++ /dev/null @@ -1,325 +0,0 @@ -/* - * libco.ppc-elf - * author: Kernigh - * license: public domain - * - * PowerPC 32-bit ELF implementation of libco (for compile with GCC), - * ported from PowerPC Mac OS X implementation (ppc.s) by Vas Crabb. - * This ELF version works for OpenBSD, and might also work for FreeBSD, - * NetBSD and Linux. - * - * Note 1: This implementation does not handle the AltiVec/VMX - * registers, because the ELF ABI does not mention them, - * and my OpenBSD system is not using them. - * - * Note 2: If you want position-independent code, then you must - * define __PIC__. gcc -fpic or -fPIC defines __PIC__, but - * gcc -fpie or -fPIE might not. If you want to use -fpie - * or -fPIE, then you might need a manual definition: - * gcc -fpie -D__PIC__=1 - * gcc -fPIE -D__PIC__=2 - * - * The ELF ABI is "System V Application Binary Interface, PowerPC - * Processor Supplement", which you can get from - * - * (PDF file, hosted by Linux Foundation). - * - * ELF and Mac OS X use similar conventions to allocate the registers, - * and to pass arguments and return values through registers. The main - * differences are that ELF has a slightly different stack format, that - * symbols are different (and without an extra underscore at the start), - * and that the assembly syntax is different. - * - * A function may destroy the values of volatile registers, but must - * preserve the values of nonvolatile registers. So the co_switch() - * function only saves the nonvolatile registers. - * - * [nonvolatile registers in ELF] - * %r1, %r14..%r31 - * %f14..%f31 - * %cr2..%cr4 in cr - * - * [volatile registers in ELF] - * %r0, %r3..%r10 - * %f0..%f13 - * %cr0, %cr1, %cr5..%cr7 in cr - * ctr, lr, xer - * - * lr (link register) is volatile, but it contains the return address, - * so co_switch must save lr. - * - * %r13 is the small data pointer. This is constant across threads, so - * co_switch() does not touch %r13. - * - * %r2 is a reserved register, so co_switch() does not touch %r2. Some - * systems might borrow an idea from the PowerPC Embedded ABI, and might - * use %r2 as a small read-only data pointer, which is constant across - * threads. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void * cothread_t; - -/* - * co_active_context is either in a global offset table (if we are - * compiling -fPIC or -fPIE) or has an absolute position. - */ -static void *co_main_stack_pointer; -static cothread_t co_active_context = &co_main_stack_pointer; - -extern cothread_t co_active() { - return co_active_context; -} - -/* - * Embedded assembly. - * - * We are not using the percent-sign substitution feature, - * so we must write "%r1", not "%%r1". - * - * We always write 'bl malloc@plt', not 'bl malloc'. The '@plt' - * is necessary in position-indepent code and seems to have no - * significant effect in fixed-position code. - * - * We never use the 'lmw' or 'stmw' instructions. The ELF ABI - * mentions that these instructions "are usually slower than - * a sequence of other instructions that have the same effect." - * We instead use sequences of 'lwz' or 'stz' instructions. - */ -__asm__("\n" -"### embedded assembly \n" -".section \".text\" \n" -" .balign 4 \n" -" \n" -/* - * void co_switch(co_thread to %r3) - * - * Allocate our stack frame of 240 bytes: - * Old New Value - * 4(%r1) 244(%r1) return address, used by us - * 0(%r1) 240(%r1) frame pointer - * 232(%r1) %f31 - * 224(%r1) %f30 - * ... - * 96(%r1) %f14 - * 92(%r1) %r31 - * 88(%r1) %r30 - * ... - * 24(%r1) %r14 - * 20(%r1) condition register - * 8(%r1) padding of 12 bytes - * 4(%r1) return address, never used - * 0(%r1) frame pointer - * - * Save our registers in our stack frame. - * Save our stack pointer in 0(%r4). - * Switch to the stack of the other thread. - * Restore registers and return. - */ -" .globl co_switch \n" -" .type co_switch, @function \n" -"co_switch: \n" -" mflr %r0 # %r0 = return address \n" -" mfcr %r9 # %r9 = condition register \n" -" stwu %r1, -240(%r1) # allocate stack frame \n" -" \n" -" stw %r0, 244(%r1) # save return address \n" -" stfd %f31, 232(%r1) # save floating-point regs \n" -" stfd %f30, 224(%r1) \n" -" stfd %f29, 216(%r1) \n" -" stfd %f28, 208(%r1) \n" -" stfd %f27, 200(%r1) \n" -" stfd %f26, 192(%r1) \n" -" stfd %f25, 184(%r1) \n" -" stfd %f24, 176(%r1) \n" -" stfd %f23, 168(%r1) \n" -" stfd %f22, 160(%r1) \n" -" stfd %f21, 152(%r1) \n" -" stfd %f20, 144(%r1) \n" -" stfd %f19, 136(%r1) \n" -" stfd %f18, 128(%r1) \n" -" stfd %f17, 120(%r1) \n" -" stfd %f16, 112(%r1) \n" -" stfd %f16, 104(%r1) \n" -" stfd %f14, 96(%r1) \n" -" stw %r31, 92(%r1) # save general-purpose regs \n" -" stw %r30, 88(%r1) \n" -" stw %r29, 84(%r1) \n" -" stw %r28, 80(%r1) \n" -" stw %r27, 76(%r1) \n" -" stw %r26, 72(%r1) \n" -" stw %r25, 68(%r1) \n" -" stw %r24, 64(%r1) \n" -" stw %r23, 60(%r1) \n" -" stw %r22, 56(%r1) \n" -" stw %r21, 52(%r1) \n" -" stw %r20, 48(%r1) \n" -" stw %r19, 44(%r1) \n" -" stw %r18, 40(%r1) \n" -" stw %r17, 36(%r1) \n" -" stw %r16, 32(%r1) \n" -" stw %r15, 28(%r1) \n" -" stw %r14, 24(%r1) \n" -" stw %r9, 20(%r1) # save condition reg \n" -" \n" -" # save current context, set new context \n" -" # %r4 = co_active_context \n" -" # co_active_context = %r3 \n" -#if __PIC__ == 2 -" # position-independent code, large model (-fPIC) \n" -" bl _GLOBAL_OFFSET_TABLE_@local-4 \n" -" mflr %r8 # %r8 = address of got \n" -" addis %r7, %r8, co_active_context@got@ha \n" -" lwz %r6, co_active_context@got@l(%r7) \n" -" lwz %r4, 0(%r6) \n" -" stw %r3, 0(%r6) \n" -#elif __PIC__ == 1 -" # position-independent code, small model (-fpic) \n" -" bl _GLOBAL_OFFSET_TABLE_@local-4 \n" -" mflr %r8 # %r8 = address of got \n" -" lwz %r7, co_active_context@got(%r8) \n" -" lwz %r4, 0(%r7) \n" -" stw %r3, 0(%r7) \n" -#else -" # fixed-position code \n" -" lis %r8, co_active_context@ha \n" -" lwz %r4, co_active_context@l(%r8) \n" -" stw %r3, co_active_context@l(%r8) \n" -#endif -" \n" -" # save current stack pointer \n" -" stw %r1, 0(%r4) \n" -" # get new stack pointer \n" -" lwz %r1, 0(%r3) \n" -" \n" -" lwz %r0, 244(%r1) # get return address \n" -" lfd %f31, 232(%r1) # restore floating-point regs \n" -" lfd %f30, 224(%r1) \n" -" lfd %f29, 216(%r1) \n" -" lfd %f28, 208(%r1) \n" -" lfd %f27, 200(%r1) \n" -" lfd %f26, 192(%r1) \n" -" lfd %f25, 184(%r1) \n" -" lfd %f24, 176(%r1) \n" -" lfd %f23, 168(%r1) \n" -" lfd %f22, 160(%r1) \n" -" lfd %f21, 152(%r1) \n" -" lfd %f20, 144(%r1) \n" -" lfd %f19, 136(%r1) \n" -" lfd %f18, 128(%r1) \n" -" lfd %f17, 120(%r1) \n" -" lfd %f16, 112(%r1) \n" -" lfd %f16, 104(%r1) \n" -" lfd %f14, 96(%r1) \n" -" lwz %r31, 92(%r1) # restore general-purpose regs \n" -" lwz %r30, 88(%r1) \n" -" lwz %r29, 84(%r1) \n" -" lwz %r28, 80(%r1) \n" -" lwz %r27, 76(%r1) \n" -" lwz %r26, 72(%r1) \n" -" lwz %r25, 68(%r1) \n" -" lwz %r24, 64(%r1) \n" -" lwz %r23, 60(%r1) \n" -" lwz %r22, 56(%r1) \n" -" lwz %r21, 52(%r1) \n" -" lwz %r20, 48(%r1) \n" -" lwz %r19, 44(%r1) \n" -" lwz %r18, 40(%r1) \n" -" lwz %r17, 36(%r1) \n" -" lwz %r16, 32(%r1) \n" -" lwz %r15, 28(%r1) \n" -" lwz %r14, 24(%r1) \n" -" lwz %r9, 20(%r1) # get condition reg \n" -" \n" -" addi %r1, %r1, 240 # free stack frame \n" -" mtlr %r0 # restore return address \n" -" mtcr %r9 # restore condition register \n" -" blr # return \n" -" .size co_switch, . - co_switch \n" -" \n" -/* - * cothread_t %r3 co_create(unsigned int stack_size %r3, - * void (*coentry %r4)()) - * - * Allocate a new stack, such that when you co_switch to that - * stack, then co_switch returns to coentry. - */ -" .globl co_create \n" -" .type co_create, @function \n" -"co_create: \n" -" mflr %r0 # %r0 = return address \n" -" stwu %r1, -16(%r1) # allocate my stack frame \n" -" stw %r0, 20(%r1) # save return address \n" -" stw %r31, 12(%r1) # save %r31 \n" -" stw %r30, 8(%r1) # save %r30 \n" -" \n" -" mr %r30, %r3 # %r30 = stack_size \n" -" mr %r31, %r4 # %r31 = coentry \n" -" \n" -" # Call malloc(stack_size %r3) to allocate stack; \n" -" # malloc() probably uses good alignment. \n" -" # \n" -" bl malloc@plt # returns %r3 = low end \n" -" cmpwi %r3, 0 # if returned NULL, \n" -" beq- 1f # then abort \n" -" \n" -" # we return %r3 = low end of stack \n" -" add %r4, %r3, %r30 # %r4 = high end of stack \n" -" \n" -" # uncomment if malloc() uses wrong alignment \n" -" #rlwinm %r4,%r4,0,0,27 # force 16-byte alignment \n" -" \n" - /* - * Allocate two stack frames: - * 16 bytes for stack frame with return address - * 240 bytes for co_switch stack frame - * - * Old New Value - * -8(%r4) 248(%r5) padding of 8 bytes - * -12(%r4) 244(%r5) return address = coentry - * -16(%r4) 240(%r5) frame pointer = NULL - * 232(%r5) %f31 = 0 - * ... - * 20(%r5) condition register = 0 - * 0(%r5) frame pointer - */ -" li %r9, (240-20)/4+1 \n" -" addi %r5, %r4, -16 # allocate first stack frame \n" -" li %r0, 0 \n" -" stwu %r5, -240(%r5) # allocate second stack frame \n" -" li %r8, 20 \n" -" mtctr %r9 # loop %r9 times \n" -"2: # loop to store zero to 20(%r5) through 240(%r5) \n" -" stwx %r0, %r5, %r8 \n" -" addi %r8, %r8, 4 # index += 4 \n" -" bdnz+ 2b # ctr -= 1, branch if nonzero \n" -" \n" -" stw %r31, 244(%r5) # return address = coentry \n" -" stw %r5, 0(%r3) # save stack pointer \n" -" \n" -" lwz %r0, 20(%r1) # get return address \n" -" lwz %r31, 12(%r1) # restore %r31 \n" -" lwz %r30, 8(%r1) # restore %r30 \n" -" mtlr %r0 # restore return address \n" -" addi %r1, %r1, 16 # free stack frame \n" -" blr # return \n" -" \n" -"1: b abort@plt # branch 1f to abort \n" -" .size co_create, . - co_create \n" -" \n" -/* - * void co_delete(cothread_t) => void free(void *) - */ -" .globl co_delete \n" -" .type co_delete, @function \n" -"co_delete: \n" -" b free@plt \n" -" \n" -); - -#ifdef __cplusplus -} -#endif diff --git a/ppc.c b/ppc.c new file mode 100644 index 00000000..a6028fdb --- /dev/null +++ b/ppc.c @@ -0,0 +1,407 @@ +/* + libco.ppc (2010-10-17) + author: blargg + license: public domain +*/ + +/* PowerPC 32/64 using embedded or external asm, with optional +floating-point and AltiVec save/restore */ + +#define LIBCO_C +#include "libco.h" +#include +#include +#include + +#define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM) + +#if LIBCO_MPROTECT + #include + #include +#endif + +/* State format (offsets in 32-bit words) + ++0 Pointer to swap code + Rest of function descriptor for entry function ++8 PC ++10 SP + Special regs + GPRs + FPRs + VRs + stack +*/ + +enum { state_size = 1024 }; +enum { above_stack = 2048 }; +enum { stack_align = 256 }; + +static thread_local cothread_t co_active_handle = 0; + +/**** Determine environment ****/ + +#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__) + +/* Whether function calls are indirect through a descriptor, +or are directly to function */ +#ifndef LIBCO_PPCDESC + #if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64) + #define LIBCO_PPCDESC 1 + #endif +#endif + +#ifdef LIBCO_PPC_ASM + + #ifdef __cplusplus + extern "C" + #endif + + /* Swap code is in ppc.S */ + void co_swap_asm( cothread_t, cothread_t ); + #define CO_SWAP_ASM( x, y ) co_swap_asm( x, y ) + +#else + +/* Swap code is here in array. Please leave dieassembly comments, +as they make it easy to see what it does, and reorder instructions +if one wants to see whether that improves performance. */ +static const uint32_t libco_ppc_code [] = { +#if LIBCO_PPC64 + 0x7d000026, /* mfcr r8 */ + 0xf8240028, /* std r1,40(r4) */ + 0x7d2802a6, /* mflr r9 */ + 0xf9c40048, /* std r14,72(r4) */ + 0xf9e40050, /* std r15,80(r4) */ + 0xfa040058, /* std r16,88(r4) */ + 0xfa240060, /* std r17,96(r4) */ + 0xfa440068, /* std r18,104(r4) */ + 0xfa640070, /* std r19,112(r4) */ + 0xfa840078, /* std r20,120(r4) */ + 0xfaa40080, /* std r21,128(r4) */ + 0xfac40088, /* std r22,136(r4) */ + 0xfae40090, /* std r23,144(r4) */ + 0xfb040098, /* std r24,152(r4) */ + 0xfb2400a0, /* std r25,160(r4) */ + 0xfb4400a8, /* std r26,168(r4) */ + 0xfb6400b0, /* std r27,176(r4) */ + 0xfb8400b8, /* std r28,184(r4) */ + 0xfba400c0, /* std r29,192(r4) */ + 0xfbc400c8, /* std r30,200(r4) */ + 0xfbe400d0, /* std r31,208(r4) */ + 0xf9240020, /* std r9,32(r4) */ + 0xe8e30020, /* ld r7,32(r3) */ + 0xe8230028, /* ld r1,40(r3) */ + 0x48000009, /* bl 1 */ + 0x7fe00008, /* trap */ + 0x91040030,/*1:stw r8,48(r4) */ + 0x80c30030, /* lwz r6,48(r3) */ + 0x7ce903a6, /* mtctr r7 */ + 0xe9c30048, /* ld r14,72(r3) */ + 0xe9e30050, /* ld r15,80(r3) */ + 0xea030058, /* ld r16,88(r3) */ + 0xea230060, /* ld r17,96(r3) */ + 0xea430068, /* ld r18,104(r3) */ + 0xea630070, /* ld r19,112(r3) */ + 0xea830078, /* ld r20,120(r3) */ + 0xeaa30080, /* ld r21,128(r3) */ + 0xeac30088, /* ld r22,136(r3) */ + 0xeae30090, /* ld r23,144(r3) */ + 0xeb030098, /* ld r24,152(r3) */ + 0xeb2300a0, /* ld r25,160(r3) */ + 0xeb4300a8, /* ld r26,168(r3) */ + 0xeb6300b0, /* ld r27,176(r3) */ + 0xeb8300b8, /* ld r28,184(r3) */ + 0xeba300c0, /* ld r29,192(r3) */ + 0xebc300c8, /* ld r30,200(r3) */ + 0xebe300d0, /* ld r31,208(r3) */ + 0x7ccff120, /* mtcr r6 */ +#else + 0x7d000026, /* mfcr r8 */ + 0x90240028, /* stw r1,40(r4) */ + 0x7d2802a6, /* mflr r9 */ + 0x91a4003c, /* stw r13,60(r4) */ + 0x91c40040, /* stw r14,64(r4) */ + 0x91e40044, /* stw r15,68(r4) */ + 0x92040048, /* stw r16,72(r4) */ + 0x9224004c, /* stw r17,76(r4) */ + 0x92440050, /* stw r18,80(r4) */ + 0x92640054, /* stw r19,84(r4) */ + 0x92840058, /* stw r20,88(r4) */ + 0x92a4005c, /* stw r21,92(r4) */ + 0x92c40060, /* stw r22,96(r4) */ + 0x92e40064, /* stw r23,100(r4) */ + 0x93040068, /* stw r24,104(r4) */ + 0x9324006c, /* stw r25,108(r4) */ + 0x93440070, /* stw r26,112(r4) */ + 0x93640074, /* stw r27,116(r4) */ + 0x93840078, /* stw r28,120(r4) */ + 0x93a4007c, /* stw r29,124(r4) */ + 0x93c40080, /* stw r30,128(r4) */ + 0x93e40084, /* stw r31,132(r4) */ + 0x91240020, /* stw r9,32(r4) */ + 0x80e30020, /* lwz r7,32(r3) */ + 0x80230028, /* lwz r1,40(r3) */ + 0x48000009, /* bl 1 */ + 0x7fe00008, /* trap */ + 0x91040030,/*1:stw r8,48(r4) */ + 0x80c30030, /* lwz r6,48(r3) */ + 0x7ce903a6, /* mtctr r7 */ + 0x81a3003c, /* lwz r13,60(r3) */ + 0x81c30040, /* lwz r14,64(r3) */ + 0x81e30044, /* lwz r15,68(r3) */ + 0x82030048, /* lwz r16,72(r3) */ + 0x8223004c, /* lwz r17,76(r3) */ + 0x82430050, /* lwz r18,80(r3) */ + 0x82630054, /* lwz r19,84(r3) */ + 0x82830058, /* lwz r20,88(r3) */ + 0x82a3005c, /* lwz r21,92(r3) */ + 0x82c30060, /* lwz r22,96(r3) */ + 0x82e30064, /* lwz r23,100(r3) */ + 0x83030068, /* lwz r24,104(r3) */ + 0x8323006c, /* lwz r25,108(r3) */ + 0x83430070, /* lwz r26,112(r3) */ + 0x83630074, /* lwz r27,116(r3) */ + 0x83830078, /* lwz r28,120(r3) */ + 0x83a3007c, /* lwz r29,124(r3) */ + 0x83c30080, /* lwz r30,128(r3) */ + 0x83e30084, /* lwz r31,132(r3) */ + 0x7ccff120, /* mtcr r6 */ +#endif + +#ifndef LIBCO_PPC_NOFP + 0xd9c400e0, /* stfd f14,224(r4) */ + 0xd9e400e8, /* stfd f15,232(r4) */ + 0xda0400f0, /* stfd f16,240(r4) */ + 0xda2400f8, /* stfd f17,248(r4) */ + 0xda440100, /* stfd f18,256(r4) */ + 0xda640108, /* stfd f19,264(r4) */ + 0xda840110, /* stfd f20,272(r4) */ + 0xdaa40118, /* stfd f21,280(r4) */ + 0xdac40120, /* stfd f22,288(r4) */ + 0xdae40128, /* stfd f23,296(r4) */ + 0xdb040130, /* stfd f24,304(r4) */ + 0xdb240138, /* stfd f25,312(r4) */ + 0xdb440140, /* stfd f26,320(r4) */ + 0xdb640148, /* stfd f27,328(r4) */ + 0xdb840150, /* stfd f28,336(r4) */ + 0xdba40158, /* stfd f29,344(r4) */ + 0xdbc40160, /* stfd f30,352(r4) */ + 0xdbe40168, /* stfd f31,360(r4) */ + 0xc9c300e0, /* lfd f14,224(r3) */ + 0xc9e300e8, /* lfd f15,232(r3) */ + 0xca0300f0, /* lfd f16,240(r3) */ + 0xca2300f8, /* lfd f17,248(r3) */ + 0xca430100, /* lfd f18,256(r3) */ + 0xca630108, /* lfd f19,264(r3) */ + 0xca830110, /* lfd f20,272(r3) */ + 0xcaa30118, /* lfd f21,280(r3) */ + 0xcac30120, /* lfd f22,288(r3) */ + 0xcae30128, /* lfd f23,296(r3) */ + 0xcb030130, /* lfd f24,304(r3) */ + 0xcb230138, /* lfd f25,312(r3) */ + 0xcb430140, /* lfd f26,320(r3) */ + 0xcb630148, /* lfd f27,328(r3) */ + 0xcb830150, /* lfd f28,336(r3) */ + 0xcba30158, /* lfd f29,344(r3) */ + 0xcbc30160, /* lfd f30,352(r3) */ + 0xcbe30168, /* lfd f31,360(r3) */ +#endif + +#ifdef __ALTIVEC__ + 0x7ca042a6, /* mfvrsave r5 */ + 0x39040180, /* addi r8,r4,384 */ + 0x39240190, /* addi r9,r4,400 */ + 0x70a00fff, /* andi. r0,r5,4095 */ + 0x90a40034, /* stw r5,52(r4) */ + 0x4182005c, /* beq- 2 */ + 0x7e8041ce, /* stvx v20,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ea049ce, /* stvx v21,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7ec041ce, /* stvx v22,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ee049ce, /* stvx v23,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f0041ce, /* stvx v24,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f2049ce, /* stvx v25,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f4041ce, /* stvx v26,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f6049ce, /* stvx v27,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f8041ce, /* stvx v28,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7fa049ce, /* stvx v29,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7fc041ce, /* stvx v30,r0,r8 */ + 0x7fe049ce, /* stvx v31,r0,r9 */ + 0x80a30034,/*2:lwz r5,52(r3) */ + 0x39030180, /* addi r8,r3,384 */ + 0x39230190, /* addi r9,r3,400 */ + 0x70a00fff, /* andi. r0,r5,4095 */ + 0x7ca043a6, /* mtvrsave r5 */ + 0x4d820420, /* beqctr */ + 0x7e8040ce, /* lvx v20,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ea048ce, /* lvx v21,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7ec040ce, /* lvx v22,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7ee048ce, /* lvx v23,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f0040ce, /* lvx v24,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f2048ce, /* lvx v25,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f4040ce, /* lvx v26,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7f6048ce, /* lvx v27,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7f8040ce, /* lvx v28,r0,r8 */ + 0x39080020, /* addi r8,r8,32 */ + 0x7fa048ce, /* lvx v29,r0,r9 */ + 0x39290020, /* addi r9,r9,32 */ + 0x7fc040ce, /* lvx v30,r0,r8 */ + 0x7fe048ce, /* lvx v31,r0,r9 */ +#endif + + 0x4e800420, /* bctr */ +}; + + #if LIBCO_PPCDESC + /* Function call goes through indirect descriptor */ + #define CO_SWAP_ASM( x, y ) \ + ((void (*)( cothread_t, cothread_t )) (uintptr_t) x)( x, y ) + #else + /* Function call goes directly to code */ + #define CO_SWAP_ASM( x, y ) \ + ((void (*)( cothread_t, cothread_t )) (uintptr_t) libco_ppc_code)( x, y ) + #endif + +#endif + +static uint32_t* co_create_( unsigned size, uintptr_t entry ) +{ + uint32_t* t = (uint32_t*) malloc( size ); + + (void) entry; + + #if LIBCO_PPCDESC + if ( t ) + { + /* Copy entry's descriptor */ + memcpy( t, (void*) entry, sizeof (void*) * 3 ); + + /* Set function pointer to swap routine */ + #ifdef LIBCO_PPC_ASM + *(const void**) t = *(void**) &co_swap_asm; + #else + *(const void**) t = libco_ppc_code; + #endif + } + #endif + + return t; +} + +cothread_t co_create( unsigned int size, void (*entry_)( void ) ) +{ + uintptr_t entry = (uintptr_t) entry_; + uint32_t* t = NULL; + + /* Be sure main thread was successfully allocated */ + if ( co_active() ) + { + size += state_size + above_stack + stack_align; + t = co_create_( size, entry ); + } + + if ( t ) + { + uintptr_t sp; + int shift; + + /* Save current registers into new thread, so that any special ones will + have proper values when thread is begun */ + CO_SWAP_ASM( t, t ); + + #if LIBCO_PPCDESC + /* Get real address */ + entry = (uintptr_t) *(void**) entry; + #endif + + /* Put stack near end of block, and align */ + sp = (uintptr_t) t + size - above_stack; + sp -= sp % stack_align; + + /* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we + save and restore them as 64 bits, regardless of the size the ABI + uses. So, we manually write pointers at the proper size. We always + save and restore at the same address, and since PPC is big-endian, + we must put the low byte first on PPC32. */ + + /* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts + and don't have to care how many bits uintptr_t is. */ + #if LIBCO_PPC64 + shift = 16; + #else + shift = 0; + #endif + + /* Set up so entry will be called on next swap */ + t [8] = (uint32_t) (entry >> shift >> shift); + t [9] = (uint32_t) entry; + + t [10] = (uint32_t) (sp >> shift >> shift); + t [11] = (uint32_t) sp; + } + + return t; +} + +void co_delete( cothread_t t ) +{ + free( t ); +} + +static void co_init_( void ) +{ + #if LIBCO_MPROTECT + /* TODO: pre- and post-pad PPC code so that this doesn't make other + data executable and writable */ + long page_size = sysconf( _SC_PAGESIZE ); + if ( page_size > 0 ) + { + uintptr_t align = page_size; + uintptr_t begin = (uintptr_t) libco_ppc_code; + uintptr_t end = begin + sizeof libco_ppc_code; + + /* Align beginning and end */ + end += align - 1; + end -= end % align; + begin -= begin % align; + + mprotect( (void*) begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC ); + } + #endif + + co_active_handle = co_create_( state_size, (uintptr_t) &co_switch ); +} + +cothread_t co_active() +{ + if ( !co_active_handle ) + co_init_(); + + return co_active_handle; +} + +void co_switch( cothread_t t ) +{ + cothread_t old = co_active_handle; + co_active_handle = t; + + CO_SWAP_ASM( t, old ); +} diff --git a/ppc.s b/ppc.s deleted file mode 100644 index d7f6b758..00000000 --- a/ppc.s +++ /dev/null @@ -1,478 +0,0 @@ -;***** -;libco.ppc (2007-11-29) -;author: Vas Crabb -;license: public domain -; -;cross-platform PowerPC implementation of libco -;special thanks to byuu for writing the original version -; -;[ABI compatibility] -;- gcc; mac os x; ppc -; -;[nonvolatile registers] -;- GPR1, GPR13 - GPR31 -;- FPR14 - FPR31 -;- V20 - V31 -;- VRSAVE, CR2 - CR4 -; -;[volatile registers] -;- GPR0, GPR2 - GPR12 -;- FPR0 - FPR13 -;- V0 - V19 -;- LR, CTR, XER, CR0, CR1, CR5 - CR7 -;***** - - -;Declare some target-specific stuff - - .section __TEXT,__text,regular,pure_instructions - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .machine ppc - - -;Constants - - .cstring - .align 2 - -_sysctl_altivec: - .ascii "hw.optional.altivec\0" - - -;Declare space for variables - -.lcomm _co_environ,4,2 ;bit 0 = initialised, bit 1 = have Altivec/VMX -.lcomm _co_primary_buffer,1024,2 ;buffer (will be zeroed by loader) - - .data - .align 2 - -_co_active_context: - .long _co_primary_buffer - - - .text - .align 2 - - -;Declare exported names - -.globl _co_active -.globl _co_create -.globl _co_delete -.globl _co_switch - - -;***** -;extern "C" cothread_t co_active(); -;return = GPR3 -;***** - -_co_active: - mflr r0 ;GPR0 = return address - bcl 20,31,L_co_active$spb -L_co_active$spb: - mflr r2 ;GPR2 set for position-independance - addis r3,r2,ha16(_co_active_context-L_co_active$spb) ;get value in GPR3 - lwz r3,lo16(_co_active_context-L_co_active$spb)(r3) - mtlr r0 ;LR = return address - blr ;return - - -;***** -;extern "C" cothread_t co_create(unsigned int heapsize, void (*coentry)()); -;GPR3 = heapsize -;GPR4 = coentry -;return = GPR3 -;***** - -_co_create: - mflr r0 ;GPR0 = return address - stmw r30,-8(r1) ;save GPR30 and GPR31 - stw r0,8(r1) ;save return address - stwu r1,-(2*4+16+24)(r1) ;allocate 16 bytes for locals/parameters - -;create heap space (stack + register storage) - addi r31,r3,1024-24 ;subtract space for linkage - mr r30,r4 ;GPR30 = coentry - addi r3,r3,1024 ;allocate extra memory for contextual info - bl L_malloc$stub ;GPR3 = malloc(heapsize + 1024) - add r4,r3,r31 ;GPR4 points to top-of-stack - rlwinm r5,r4,0,0,27 ;force 16-byte alignment - -;store thread entry point + registers, so that first call to co_switch will execute coentry - stw r30,8(r5) ;store entry point - addi r6,0,2+19+18*2+12*4+1 ;clear for CR, old GPR1, 19 GPRs, 18 FPRs, 12 VRs, VRSAVE - addi r0,0,0 - addi r7,0,4 ;start at 4(GPR5) - mtctr r6 -L_co_create$clear_loop: - stwx r0,r5,r7 ;clear a word - addi r7,r7,-4 ;increment pointer - bdnz L_co_create$clear_loop ;loop - stwu r5,-448(r5) ;store top of stack - -;initialize context memory heap and return - stw r5,0(r3) ;*cothread_t = stack heap pointer (GPR1) - lwz r1,0(r1) ;deallocate stack frame - lwz r8,8(r1) ;fetch return address - lmw r30,-8(r1) ;restore GPR30 and GPR31 - mtlr r8 ;return address in LR - blr ;return - - -;***** -;extern "C" void co_delete(cothread_t cothread); -;GPR3 = cothread -;***** - -_co_delete: - b L_free$stub ;free(GPR3) - - -;***** -;extern "C" void co_switch(cothread_t cothread); -;GPR3 = cothread -;***** -; -;Frame looks like: -; -;Old New Value -; 8(r1) 456(r1) Saved LR -; 4(r1) 452(r1) Saved CR -; 0(r1) 448(r1) Old GPR1 -; -4(r1) 444(r1) Saved GPR31 -; -8(r1) 440(r1) Saved GPR30 -;... ... ... -; -72(r1) 376(r1) Saved GPR14 -; -76(r1) 372(r1) Saved GPR13 -; -80(r1) 368(r1) Saved VRSAVE -; -84(r1) 364(r1) +++ -; -88(r1) 360(r1) Saved FPR31 -; -92(r1) 356(r1) +++ -; -96(r1) 352(r1) Saved FPR30 -;... ... ... -;-212(r1) 236(r1) +++ -;-216(r1) 232(r1) Saved FPR15 -;-220(r1) 228(r1) +++ -;-224(r1) 224(r1) Saved FPR14 -;-228(r1) 220(r1) +++ value -;-232(r1) 216(r1) +++ len -;-236(r1) 212(r1) +++ -;-240(r1) 208(r1) Saved VR31 -;-244(r1) 204(r1) +++ -;-248(r1) 200(r1) +++ -;-252(r1) 196(r1) +++ -;-256(r1) 192(r1) Saved VR30 -;... ... ... -;-388(r1) 60(r1) +++ -;-392(r1) 56(r1) +++ -;-396(r1) 52(r1) +++ -;-400(r1) 48(r1) Saved VR21 -;-404(r1) 44(r1) +++ -;-408(r1) 40(r1) +++ Param 5 (GPR7) -;-412(r1) 36(r1) +++ Param 4 (GPR6) -;-416(r1) 32(r1) Saved VR20 Param 3 (GPR5) -;-420(r1) 28(r1) - Param 2 (GPR4) -;-424(r1) 24(r1) - Param 1 (GPR3) -;-428(r1) 20(r1) - Reserved -;-432(r1) 16(r1) - Reserved -;-436(r1) 12(r1) - Reserved -;-440(r1) 8(r1) - New LR -;-444(r1) 4(r1) - New CR -;-448(r1) 0(r1) Saved GPR1 - - -_co_switch: - stmw r13,-76(r1) ;save preserved GPRs - stfd f14,-224(r1) ;save preserved FPRs - stfd f15,-216(r1) - stfd f16,-208(r1) - stfd f17,-200(r1) - stfd f18,-192(r1) - stfd f19,-184(r1) - stfd f20,-176(r1) - stfd f21,-168(r1) - stfd f22,-160(r1) - stfd f23,-152(r1) - stfd f24,-144(r1) - stfd f25,-136(r1) - stfd f26,-128(r1) - stfd f27,-120(r1) - stfd f28,-112(r1) - stfd f29,-104(r1) - stfd f30,-96(r1) - stfd f31,-88(r1) - mflr r0 ;save return address - stw r0,8(r1) - mfcr r2 ;save condition codes - stw r2,4(r1) - stwu r1,-448(r1) ;create stack frame (save 19 GPRs, 18 FRPs, 12 VRs, VRSAVE) - - mr r30,r3 ;save new context pointer - bcl 20,31,L_co_switch$spb ;get address of co_active_context -L_co_switch$spb: - mflr r31 - - addis r29,r31,ha16(_co_environ-L_co_switch$spb) ;get environment flags - lwz r8,lo16(_co_environ-L_co_switch$spb)(r29) - andis. r9,r8,0x8000 ;is it initialised? - bne+ L_co_switch$initialised - - addi r0,0,4 ;len = sizeof(int) - stw r0,216(r1) - addis r3,r31,ha16(_sysctl_altivec-L_co_switch$spb) ;GPR3 = "hw.optional.altivec" - addi r3,r3,lo16(_sysctl_altivec-L_co_switch$spb) - addi r4,r1,220 ;GPR4 = &value - addi r5,r1,216 ;GPR5 = &len - addi r6,0,0 ;newp = 0 - addi r7,0,0 ;newlen = 0 - bl L_sysctlbyname$stub ;call sysctlbyname - lwz r2,220(r1) ;fetch result - addis r8,0,0x8000 ;set initialised bit - cmpwi cr5,r3,0 ;assume error means not present - cmpwi cr6,r2,0 ;test result - blt- cr5,L_co_switch$store_environ - beq cr6,L_co_switch$store_environ - oris r8,r8,0x4000 ;set the flag to say we have it! -L_co_switch$store_environ: - stw r8,lo16(_co_environ-L_co_switch$spb)(r29) ;store environment flags -L_co_switch$initialised: - - andis. r10,r8,0x4000 ;do we have Altivec/VMX? - beq L_co_switch$save_no_vmx - mfspr r11,256 ;save VRSAVE - andi. r0,r11,0x0FFF ;short-circuit if it's zero - stw r11,368(r1) - beq L_co_switch$save_no_vmx - andi. r0,r11,0x0800 ;check bit 20 - addi r2,0,32 ;starting index - beq L_co_switch$save_skip_vr20 - stvx v20,r1,r2 ;save VR20 -L_co_switch$save_skip_vr20: - addi r2,r2,16 ;stride - andi. r0,r11,0x0400 ;check bit 21 - beq L_co_switch$save_skip_vr21 - stvx v21,r1,r2 ;save VR21 -L_co_switch$save_skip_vr21: - addi r2,r2,16 ;stride - andi. r0,r11,0x0200 ;check bit 22 - beq L_co_switch$save_skip_vr22 - stvx v22,r1,r2 ;save VR22 -L_co_switch$save_skip_vr22: - addi r2,r2,16 ;stride - andi. r0,r11,0x0100 ;check bit 23 - beq L_co_switch$save_skip_vr23 - stvx v23,r1,r2 ;save VR23 -L_co_switch$save_skip_vr23: - addi r2,r2,16 ;stride - andi. r0,r11,0x0080 ;check bit 24 - beq L_co_switch$save_skip_vr24 - stvx v24,r1,r2 ;save VR24 -L_co_switch$save_skip_vr24: - addi r2,r2,16 ;stride - andi. r0,r11,0x0040 ;check bit 25 - beq L_co_switch$save_skip_vr25 - stvx v25,r1,r2 ;save VR25 -L_co_switch$save_skip_vr25: - addi r2,r2,16 ;stride - andi. r0,r11,0x0020 ;check bit 26 - beq L_co_switch$save_skip_vr26 - stvx v26,r1,r2 ;save VR26 -L_co_switch$save_skip_vr26: - addi r2,r2,16 ;stride - andi. r0,r11,0x0010 ;check bit 27 - beq L_co_switch$save_skip_vr27 - stvx v27,r1,r2 ;save VR27 -L_co_switch$save_skip_vr27: - addi r2,r2,16 ;stride - andi. r0,r11,0x0008 ;check bit 28 - beq L_co_switch$save_skip_vr28 - stvx v28,r1,r2 ;save VR28 -L_co_switch$save_skip_vr28: - addi r2,r2,16 ;stride - andi. r0,r11,0x0004 ;check bit 29 - beq L_co_switch$save_skip_vr29 - stvx v29,r1,r2 ;save VR29 -L_co_switch$save_skip_vr29: - addi r2,r2,16 ;stride - andi. r0,r11,0x0002 ;check bit 30 - beq L_co_switch$save_skip_vr30 - stvx v30,r1,r2 ;save VR30 -L_co_switch$save_skip_vr30: - addi r2,r2,16 ;stride - andi. r0,r11,0x0001 ;check bit 31 - beq L_co_switch$save_skip_vr31 - stvx v31,r1,r2 ;save VR31 -L_co_switch$save_skip_vr31: -L_co_switch$save_no_vmx: - - addis r4,r31,ha16(_co_active_context-L_co_switch$spb) ;save current context - lwz r5,lo16(_co_active_context-L_co_switch$spb)(r4) - stw r30,lo16(_co_active_context-L_co_switch$spb)(r4);set new context - stw r1,0(r5) ;save current stack pointer - lwz r1,0(r30) ;get new stack pointer - - andis. r10,r8,0x4000 ;do we have Altivec/VMX? - beq L_co_switch$restore_no_vmx - lwz r11,368(r1) ;restore VRSAVE - andi. r0,r11,0x0FFF ;short-circuit if it's zero - mtspr 256,r11 - beq L_co_switch$restore_no_vmx - andi. r0,r11,0x0800 ;check bit 20 - addi r2,0,32 ;starting index - beq L_co_switch$restore_skip_vr20 - lvx v20,r1,r2 ;restore VR20 -L_co_switch$restore_skip_vr20: - addi r2,r2,16 ;stride - andi. r0,r11,0x0400 ;check bit 21 - beq L_co_switch$restore_skip_vr21 - lvx v21,r1,r2 ;restore VR21 -L_co_switch$restore_skip_vr21: - addi r2,r2,16 ;stride - andi. r0,r11,0x0200 ;check bit 22 - beq L_co_switch$restore_skip_vr22 - lvx v22,r1,r2 ;restore VR22 -L_co_switch$restore_skip_vr22: - addi r2,r2,16 ;stride - andi. r0,r11,0x0100 ;check bit 23 - beq L_co_switch$restore_skip_vr23 - lvx v23,r1,r2 ;restore VR23 -L_co_switch$restore_skip_vr23: - addi r2,r2,16 ;stride - andi. r0,r11,0x0080 ;check bit 24 - beq L_co_switch$restore_skip_vr24 - lvx v24,r1,r2 ;restore VR24 -L_co_switch$restore_skip_vr24: - addi r2,r2,16 ;stride - andi. r0,r11,0x0040 ;check bit 25 - beq L_co_switch$restore_skip_vr25 - lvx v25,r1,r2 ;restore VR25 -L_co_switch$restore_skip_vr25: - addi r2,r2,16 ;stride - andi. r0,r11,0x0020 ;check bit 26 - beq L_co_switch$restore_skip_vr26 - lvx v26,r1,r2 ;restore VR26 -L_co_switch$restore_skip_vr26: - addi r2,r2,16 ;stride - andi. r0,r11,0x0010 ;check bit 27 - beq L_co_switch$restore_skip_vr27 - lvx v27,r1,r2 ;restore VR27 -L_co_switch$restore_skip_vr27: - addi r2,r2,16 ;stride - andi. r0,r11,0x0008 ;check bit 28 - beq L_co_switch$restore_skip_vr28 - lvx v28,r1,r2 ;restore VR28 -L_co_switch$restore_skip_vr28: - addi r2,r2,16 ;stride - andi. r0,r11,0x0004 ;check bit 29 - beq L_co_switch$restore_skip_vr29 - lvx v29,r1,r2 ;restore VR29 -L_co_switch$restore_skip_vr29: - addi r2,r2,16 ;stride - andi. r0,r11,0x0002 ;check bit 30 - beq L_co_switch$restore_skip_vr30 - lvx v30,r1,r2 ;restore VR30 -L_co_switch$restore_skip_vr30: - addi r2,r2,16 ;stride - andi. r0,r11,0x0001 ;check bit 31 - beq L_co_switch$restore_skip_vr31 - lvx v31,r1,r2 ;restore VR31 -L_co_switch$restore_skip_vr31: -L_co_switch$restore_no_vmx: - - lwz r1,0(r1) ;deallocate stack frame - lwz r6,8(r1) ;return address in GPR6 - lwz r7,4(r1) ;condition codes in GPR7 - addi r0,0,0 ;make thread main crash if it returns - lmw r13,-76(r1) ;restore preserved GPRs - lfd f14,-224(r1) ;restore preserved FPRs - lfd f15,-216(r1) - lfd f16,-208(r1) - lfd f17,-200(r1) - lfd f18,-192(r1) - lfd f19,-184(r1) - lfd f20,-176(r1) - lfd f21,-168(r1) - lfd f22,-160(r1) - lfd f23,-152(r1) - lfd f24,-144(r1) - lfd f25,-136(r1) - lfd f26,-128(r1) - lfd f27,-120(r1) - lfd f28,-112(r1) - lfd f29,-104(r1) - lfd f30,-96(r1) - lfd f31,-88(r1) - mtlr r0 - mtctr r6 ;restore return address - mtcrf 32,r7 ;restore preserved condition codes - mtcrf 16,r7 - mtcrf 8,r7 - bctr ;return - - - -;Import external functions - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_malloc$stub: - .indirect_symbol _malloc - mflr r0 - bcl 20,31,L_malloc$spb -L_malloc$spb: - mflr r11 - addis r11,r11,ha16(L_malloc$lazy_ptr-L_malloc$spb) - mtlr r0 - lwzu r12,lo16(L_malloc$lazy_ptr-L_malloc$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_malloc$lazy_ptr: - .indirect_symbol _malloc - .long dyld_stub_binding_helper - - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_free$stub: - .indirect_symbol _free - mflr r0 - bcl 20,31,L_free$spb -L_free$spb: - mflr r11 - addis r11,r11,ha16(L_free$lazy_ptr-L_free$spb) - mtlr r0 - lwzu r12,lo16(L_free$lazy_ptr-L_free$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_free$lazy_ptr: - .indirect_symbol _free - .long dyld_stub_binding_helper - - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_sysctlbyname$stub: - .indirect_symbol _sysctlbyname - mflr r0 - bcl 20,31,L_sysctlbyname$spb -L_sysctlbyname$spb: - mflr r11 - addis r11,r11,ha16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb) - mtlr r0 - lwzu r12,lo16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_sysctlbyname$lazy_ptr: - .indirect_symbol _sysctlbyname - .long dyld_stub_binding_helper - - -;This needs to be here! - - .subsections_via_symbols - diff --git a/ppc64.s b/ppc64.s deleted file mode 100644 index 2fb048d7..00000000 --- a/ppc64.s +++ /dev/null @@ -1,513 +0,0 @@ -;***** -;libco.ppc64 (2007-12-05) -;author: Vas Crabb -;license: public domain -; -;cross-platform 64-bit PowerPC implementation of libco -;special thanks to byuu for writing the original version -; -;[ABI compatibility] -;- gcc; mac os x; ppc64 -; -;[nonvolatile registers] -;- GPR1, GPR13 - GPR31 -;- FPR14 - FPR31 -;- V20 - V31 -;- VRSAVE, CR2 - CR4 -; -;[volatile registers] -;- GPR0, GPR2 - GPR12 -;- FPR0 - FPR13 -;- V0 - V19 -;- LR, CTR, XER, CR0, CR1, CR5 - CR7 -;***** - - -;Declare some target-specific stuff - - .section __TEXT,__text,regular,pure_instructions - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .machine ppc64 - - -;Constants - - .cstring - .align 3 - -_sysctl_altivec: - .ascii "hw.optional.altivec\0" - - -;Declare space for variables - -.lcomm _co_environ,4,2 ;bit 0 = initialised, bit 1 = have Altivec/VMX -.lcomm _co_primary_buffer,1024,3 ;buffer (will be zeroed by loader) - - .data - .align 3 - -_co_active_context: - .quad _co_primary_buffer - - - .text - .align 2 - - -;Declare exported names - -.globl _co_active -.globl _co_create -.globl _co_delete -.globl _co_switch - - -;***** -;extern "C" cothread_t co_active(); -;return = GPR3 -;***** - -_co_active: - mflr r0 ;GPR0 = return address - bcl 20,31,L_co_active$spb -L_co_active$spb: - mflr r2 ;GPR2 set for position-independance - addis r3,r2,ha16(_co_active_context-L_co_active$spb) ;get value in GPR3 - ld r3,lo16(_co_active_context-L_co_active$spb)(r3) - mtlr r0 ;LR = return address - blr ;return - - -;***** -;extern "C" cothread_t co_create(unsigned int heapsize, void (*coentry)()); -;GPR3 = heapsize -;GPR4 = coentry -;return = GPR3 -;***** - -_co_create: - mflr r0 ;GPR0 = return address - std r30,-16(r1) ;save GPR30 and GPR31 - std r31,-8(r1) - std r0,16(r1) ;save return address - stdu r1,-(2*8+16+48)(r1) ;allocate 16 bytes for locals/parameters - -;create heap space (stack + register storage) - addi r31,r3,1024-48 ;subtract space for linkage - mr r30,r4 ;GPR30 = coentry - addi r3,r3,1024 ;allocate extra memory for contextual info - bl L_malloc$stub ;GPR3 = malloc(heapsize + 1024) - add r4,r3,r31 ;GPR4 points to top-of-stack - rldicr r5,r4,0,59 ;force 16-byte alignment - -;store thread entry point + registers, so that first call to co_switch will execute coentry - std r30,16(r5) ;store entry point - addi r6,0,2+19+18+12*2+1 ;clear for CR, old GPR1, 19 GPRs, 18 FPRs, 12 VRs, VRSAVE - addi r0,0,0 - addi r7,0,8 ;start at 8(GPR5) - mtctr r6 -L_co_create$clear_loop: - stdx r0,r5,r7 ;clear a double - addi r7,r7,-8 ;increment pointer - bdnz L_co_create$clear_loop ;loop - stdu r5,-544(r5) ;store top of stack - -;initialize context memory heap and return - addis r9,0,0x8000 ;GPR13 not set (system TLS) - std r5,0(r3) ;*cothread_t = stack heap pointer (GPR1) - stw r9,8(r3) ;this is a flag word - ld r1,0(r1) ;deallocate stack frame - ld r8,16(r1) ;fetch return address - ld r30,-16(r1) ;restore GPR30 and GPR31 - ld r31,-8(r1) - mtlr r8 ;return address in LR - blr ;return - - -;***** -;extern "C" void co_delete(cothread_t cothread); -;GPR3 = cothread -;***** - -_co_delete: - b L_free$stub ;free(GPR3) - - -;***** -;extern "C" void co_switch(cothread_t cothread); -;GPR3 = cothread -;***** -; -;Frame looks like: -; -;Old New Value -; 16(r1) 560(r1) Saved LR -; 8(r1) 552(r1) Saved CR -; 0(r1) 544(r1) Old GPR1 -; -8(r1) 536(r1) Saved GPR31 -; -16(r1) 528(r1) Saved GPR30 -;... ... ... -;-144(r1) 400(r1) Saved GPR14 -;-152(r1) 392(r1) Saved GPR13 -;-160(r1) 384(r1) Saved FPR31 -;-168(r1) 376(r1) Saved FPR30 -;... ... ... -;-288(r1) 256(r1) Saved FPR15 -;-296(r1) 248(r1) Saved FPR14 -;-304(r1) 240(r1) Saved VRSAVE -;-312(r1) 232(r1) +++ value -;-320(r1) 224(r1) Saved VR31 len -;-328(r1) 216(r1) +++ -;-336(r1) 208(r1) Saved VR30 -;... ... ... -;-456(r1) 88(r1) +++ -;-464(r1) 80(r1) Saved VR22 Param 5 (GPR7) -;-472(r1) 72(r1) +++ Param 4 (GPR6) -;-480(r1) 64(r1) Saved VR21 Param 3 (GPR5) -;-488(r1) 56(r1) +++ Param 2 (GPR4) -;-496(r1) 48(r1) Saved VR20 Param 1 (GPR3) -;-504(r1) 40(r1) - Reserved -;-512(r1) 32(r1) - Reserved -;-520(r1) 24(r1) - Reserved -;-528(r1) 16(r1) - New LR -;-536(r1) 8(r1) - New CR -;-544(r1) 0(r1) Saved GPR1 - - -_co_switch: - std r13,-152(r1) ;save preserved GPRs - std r14,-144(r1) - std r15,-136(r1) - std r16,-128(r1) - std r17,-120(r1) - std r18,-112(r1) - std r19,-104(r1) - std r20,-96(r1) - std r21,-88(r1) - std r22,-80(r1) - std r23,-72(r1) - std r24,-64(r1) - std r25,-56(r1) - std r26,-48(r1) - std r27,-40(r1) - std r28,-32(r1) - std r29,-24(r1) - std r30,-16(r1) - std r31,-8(r1) - mflr r0 ;save return address - std r0,16(r1) - mfcr r2 ;save condition codes - stw r2,8(r1) - stdu r1,-544(r1) ;create stack frame (save 19 GPRs, 18 FRPs, 12 VRs, VRSAVE) - stfd f14,248(r1) ;save preserved FPRs - stfd f15,256(r1) - stfd f16,264(r1) - stfd f17,272(r1) - stfd f18,280(r1) - stfd f19,288(r1) - stfd f20,296(r1) - stfd f21,304(r1) - stfd f22,312(r1) - stfd f23,320(r1) - stfd f24,328(r1) - stfd f25,336(r1) - stfd f26,344(r1) - stfd f27,352(r1) - stfd f28,360(r1) - stfd f29,368(r1) - stfd f30,376(r1) - stfd f31,384(r1) - - mr r30,r3 ;save new context pointer - bcl 20,31,L_co_switch$spb ;get address of co_active_context -L_co_switch$spb: - mflr r31 - - addis r29,r31,ha16(_co_environ-L_co_switch$spb) ;get environment flags - lwz r8,lo16(_co_environ-L_co_switch$spb)(r29) - andis. r9,r8,0x8000 ;is it initialised? - bne+ L_co_switch$initialised - - addi r0,0,4 ;len = sizeof(int) - std r0,224(r1) - addis r3,r31,ha16(_sysctl_altivec-L_co_switch$spb) ;GPR3 = "hw.optional.altivec" - addi r3,r3,lo16(_sysctl_altivec-L_co_switch$spb) - addi r4,r1,232 ;GPR4 = &value - addi r5,r1,224 ;GPR5 = &len - addi r6,0,0 ;newp = 0 - addi r7,0,0 ;newlen = 0 - bl L_sysctlbyname$stub ;call sysctlbyname - lwz r2,232(r1) ;fetch result - addis r8,0,0x8000 ;set initialised bit - cmpdi cr5,r3,0 ;assume error means not present - cmpwi cr6,r2,0 ;test result - blt- cr5,L_co_switch$store_environ - beq cr6,L_co_switch$store_environ - oris r8,r8,0x4000 ;set the flag to say we have it! -L_co_switch$store_environ: - stw r8,lo16(_co_environ-L_co_switch$spb)(r29) ;store environment flags -L_co_switch$initialised: - - andis. r10,r8,0x4000 ;do we have Altivec/VMX? - beq L_co_switch$save_no_vmx - mfspr r11,256 ;save VRSAVE - andi. r0,r11,0x0FFF ;short-circuit if it's zero - stw r11,240(r1) - beq L_co_switch$save_no_vmx - andi. r0,r11,0x0800 ;check bit 20 - addi r2,0,48 ;starting index - beq L_co_switch$save_skip_vr20 - stvx v20,r1,r2 ;save VR20 -L_co_switch$save_skip_vr20: - addi r2,r2,16 ;stride - andi. r0,r11,0x0400 ;check bit 21 - beq L_co_switch$save_skip_vr21 - stvx v21,r1,r2 ;save VR21 -L_co_switch$save_skip_vr21: - addi r2,r2,16 ;stride - andi. r0,r11,0x0200 ;check bit 22 - beq L_co_switch$save_skip_vr22 - stvx v22,r1,r2 ;save VR22 -L_co_switch$save_skip_vr22: - addi r2,r2,16 ;stride - andi. r0,r11,0x0100 ;check bit 23 - beq L_co_switch$save_skip_vr23 - stvx v23,r1,r2 ;save VR23 -L_co_switch$save_skip_vr23: - addi r2,r2,16 ;stride - andi. r0,r11,0x0080 ;check bit 24 - beq L_co_switch$save_skip_vr24 - stvx v24,r1,r2 ;save VR24 -L_co_switch$save_skip_vr24: - addi r2,r2,16 ;stride - andi. r0,r11,0x0040 ;check bit 25 - beq L_co_switch$save_skip_vr25 - stvx v25,r1,r2 ;save VR25 -L_co_switch$save_skip_vr25: - addi r2,r2,16 ;stride - andi. r0,r11,0x0020 ;check bit 26 - beq L_co_switch$save_skip_vr26 - stvx v26,r1,r2 ;save VR26 -L_co_switch$save_skip_vr26: - addi r2,r2,16 ;stride - andi. r0,r11,0x0010 ;check bit 27 - beq L_co_switch$save_skip_vr27 - stvx v27,r1,r2 ;save VR27 -L_co_switch$save_skip_vr27: - addi r2,r2,16 ;stride - andi. r0,r11,0x0008 ;check bit 28 - beq L_co_switch$save_skip_vr28 - stvx v28,r1,r2 ;save VR28 -L_co_switch$save_skip_vr28: - addi r2,r2,16 ;stride - andi. r0,r11,0x0004 ;check bit 29 - beq L_co_switch$save_skip_vr29 - stvx v29,r1,r2 ;save VR29 -L_co_switch$save_skip_vr29: - addi r2,r2,16 ;stride - andi. r0,r11,0x0002 ;check bit 30 - beq L_co_switch$save_skip_vr30 - stvx v30,r1,r2 ;save VR30 -L_co_switch$save_skip_vr30: - addi r2,r2,16 ;stride - andi. r0,r11,0x0001 ;check bit 31 - beq L_co_switch$save_skip_vr31 - stvx v31,r1,r2 ;save VR31 -L_co_switch$save_skip_vr31: -L_co_switch$save_no_vmx: - - addis r4,r31,ha16(_co_active_context-L_co_switch$spb) ;save current context - ld r5,lo16(_co_active_context-L_co_switch$spb)(r4) - std r30,lo16(_co_active_context-L_co_switch$spb)(r4);set new context - std r1,0(r5) ;save current stack pointer - ld r1,0(r30) ;get new stack pointer - lwz r12,8(r30) ;have we already set GPR13 (system TLS)? - andis. r0,r12,0x8000 - beq+ L_co_switch$gpr13_set - std r13,392(r1) - xoris r12,r12,0x8000 - stw r12,8(r30) -L_co_switch$gpr13_set: - - andis. r10,r8,0x4000 ;do we have Altivec/VMX? - beq L_co_switch$restore_no_vmx - lwz r11,240(r1) ;restore VRSAVE - andi. r0,r11,0x0FFF ;short-circuit if it's zero - mtspr 256,r11 - beq L_co_switch$restore_no_vmx - andi. r0,r11,0x0800 ;check bit 20 - addi r2,0,48 ;starting index - beq L_co_switch$restore_skip_vr20 - lvx v20,r1,r2 ;restore VR20 -L_co_switch$restore_skip_vr20: - addi r2,r2,16 ;stride - andi. r0,r11,0x0400 ;check bit 21 - beq L_co_switch$restore_skip_vr21 - lvx v21,r1,r2 ;restore VR21 -L_co_switch$restore_skip_vr21: - addi r2,r2,16 ;stride - andi. r0,r11,0x0200 ;check bit 22 - beq L_co_switch$restore_skip_vr22 - lvx v22,r1,r2 ;restore VR22 -L_co_switch$restore_skip_vr22: - addi r2,r2,16 ;stride - andi. r0,r11,0x0100 ;check bit 23 - beq L_co_switch$restore_skip_vr23 - lvx v23,r1,r2 ;restore VR23 -L_co_switch$restore_skip_vr23: - addi r2,r2,16 ;stride - andi. r0,r11,0x0080 ;check bit 24 - beq L_co_switch$restore_skip_vr24 - lvx v24,r1,r2 ;restore VR24 -L_co_switch$restore_skip_vr24: - addi r2,r2,16 ;stride - andi. r0,r11,0x0040 ;check bit 25 - beq L_co_switch$restore_skip_vr25 - lvx v25,r1,r2 ;restore VR25 -L_co_switch$restore_skip_vr25: - addi r2,r2,16 ;stride - andi. r0,r11,0x0020 ;check bit 26 - beq L_co_switch$restore_skip_vr26 - lvx v26,r1,r2 ;restore VR26 -L_co_switch$restore_skip_vr26: - addi r2,r2,16 ;stride - andi. r0,r11,0x0010 ;check bit 27 - beq L_co_switch$restore_skip_vr27 - lvx v27,r1,r2 ;restore VR27 -L_co_switch$restore_skip_vr27: - addi r2,r2,16 ;stride - andi. r0,r11,0x0008 ;check bit 28 - beq L_co_switch$restore_skip_vr28 - lvx v28,r1,r2 ;restore VR28 -L_co_switch$restore_skip_vr28: - addi r2,r2,16 ;stride - andi. r0,r11,0x0004 ;check bit 29 - beq L_co_switch$restore_skip_vr29 - lvx v29,r1,r2 ;restore VR29 -L_co_switch$restore_skip_vr29: - addi r2,r2,16 ;stride - andi. r0,r11,0x0002 ;check bit 30 - beq L_co_switch$restore_skip_vr30 - lvx v30,r1,r2 ;restore VR30 -L_co_switch$restore_skip_vr30: - addi r2,r2,16 ;stride - andi. r0,r11,0x0001 ;check bit 31 - beq L_co_switch$restore_skip_vr31 - lvx v31,r1,r2 ;restore VR31 -L_co_switch$restore_skip_vr31: -L_co_switch$restore_no_vmx: - - lfd f14,248(r1) ;restore preserved FPRs - lfd f15,256(r1) - lfd f16,264(r1) - lfd f17,272(r1) - lfd f18,280(r1) - lfd f19,288(r1) - lfd f20,296(r1) - lfd f21,304(r1) - lfd f22,312(r1) - lfd f23,320(r1) - lfd f24,328(r1) - lfd f25,336(r1) - lfd f26,344(r1) - lfd f27,352(r1) - lfd f28,360(r1) - lfd f29,368(r1) - lfd f30,376(r1) - lfd f31,384(r1) - addi r0,0,0 ;make thread main crash if it returns - ld r1,0(r1) ;deallocate stack frame - ld r6,16(r1) ;return address in GPR6 - lwz r7,8(r1) ;condition codes in GPR7 - ld r13,-152(r1) ;restore preserved GPRs - ld r14,-144(r1) - ld r15,-136(r1) - ld r16,-128(r1) - ld r17,-120(r1) - ld r18,-112(r1) - ld r19,-104(r1) - ld r20,-96(r1) - ld r21,-88(r1) - ld r22,-80(r1) - ld r23,-72(r1) - ld r24,-64(r1) - ld r25,-56(r1) - ld r26,-48(r1) - ld r27,-40(r1) - ld r28,-32(r1) - ld r29,-24(r1) - ld r30,-16(r1) - ld r31,-8(r1) - mtlr r0 - mtctr r6 ;restore return address - mtcrf 32,r7 ;restore preserved condition codes - mtcrf 16,r7 - mtcrf 8,r7 - bctr ;return - - - -;Import external functions - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_malloc$stub: - .indirect_symbol _malloc - mflr r0 - bcl 20,31,L_malloc$spb -L_malloc$spb: - mflr r11 - addis r11,r11,ha16(L_malloc$lazy_ptr-L_malloc$spb) - mtlr r0 - ldu r12,lo16(L_malloc$lazy_ptr-L_malloc$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_malloc$lazy_ptr: - .indirect_symbol _malloc - .quad dyld_stub_binding_helper - - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_free$stub: - .indirect_symbol _free - mflr r0 - bcl 20,31,L_free$spb -L_free$spb: - mflr r11 - addis r11,r11,ha16(L_free$lazy_ptr-L_free$spb) - mtlr r0 - ldu r12,lo16(L_free$lazy_ptr-L_free$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_free$lazy_ptr: - .indirect_symbol _free - .quad dyld_stub_binding_helper - - - .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 - .align 5 -L_sysctlbyname$stub: - .indirect_symbol _sysctlbyname - mflr r0 - bcl 20,31,L_sysctlbyname$spb -L_sysctlbyname$spb: - mflr r11 - addis r11,r11,ha16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb) - mtlr r0 - ldu r12,lo16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb)(r11) - mtctr r12 - bctr - .lazy_symbol_pointer -L_sysctlbyname$lazy_ptr: - .indirect_symbol _sysctlbyname - .quad dyld_stub_binding_helper - - -;This needs to be here! - - .subsections_via_symbols -