mirror of https://github.com/bsnes-emu/bsnes.git
326 lines
10 KiB
C
326 lines
10 KiB
C
|
/*
|
||
|
* libco.ppc-elf
|
||
|
* author: Kernigh
|
||
|
* license: public domain
|
||
|
*
|
||
|
* PowerPC 32-bit ELF implementation of libco (for compile with GCC),
|
||
|
* ported from PowerPC Mac OS X implementation (ppc.s) by Vas Crabb.
|
||
|
* This ELF version works for OpenBSD, and might also work for FreeBSD,
|
||
|
* NetBSD and Linux.
|
||
|
*
|
||
|
* Note 1: This implementation does not handle the AltiVec/VMX
|
||
|
* registers, because the ELF ABI does not mention them,
|
||
|
* and my OpenBSD system is not using them.
|
||
|
*
|
||
|
* Note 2: If you want position-independent code, then you must
|
||
|
* define __PIC__. gcc -fpic or -fPIC defines __PIC__, but
|
||
|
* gcc -fpie or -fPIE might not. If you want to use -fpie
|
||
|
* or -fPIE, then you might need a manual definition:
|
||
|
* gcc -fpie -D__PIC__=1
|
||
|
* gcc -fPIE -D__PIC__=2
|
||
|
*
|
||
|
* The ELF ABI is "System V Application Binary Interface, PowerPC
|
||
|
* Processor Supplement", which you can get from
|
||
|
* <http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf>
|
||
|
* (PDF file, hosted by Linux Foundation).
|
||
|
*
|
||
|
* ELF and Mac OS X use similar conventions to allocate the registers,
|
||
|
* and to pass arguments and return values through registers. The main
|
||
|
* differences are that ELF has a slightly different stack format, that
|
||
|
* symbols are different (and without an extra underscore at the start),
|
||
|
* and that the assembly syntax is different.
|
||
|
*
|
||
|
* A function may destroy the values of volatile registers, but must
|
||
|
* preserve the values of nonvolatile registers. So the co_switch()
|
||
|
* function only saves the nonvolatile registers.
|
||
|
*
|
||
|
* [nonvolatile registers in ELF]
|
||
|
* %r1, %r14..%r31
|
||
|
* %f14..%f31
|
||
|
* %cr2..%cr4 in cr
|
||
|
*
|
||
|
* [volatile registers in ELF]
|
||
|
* %r0, %r3..%r10
|
||
|
* %f0..%f13
|
||
|
* %cr0, %cr1, %cr5..%cr7 in cr
|
||
|
* ctr, lr, xer
|
||
|
*
|
||
|
* lr (link register) is volatile, but it contains the return address,
|
||
|
* so co_switch must save lr.
|
||
|
*
|
||
|
* %r13 is the small data pointer. This is constant across threads, so
|
||
|
* co_switch() does not touch %r13.
|
||
|
*
|
||
|
* %r2 is a reserved register, so co_switch() does not touch %r2. Some
|
||
|
* systems might borrow an idea from the PowerPC Embedded ABI, and might
|
||
|
* use %r2 as a small read-only data pointer, which is constant across
|
||
|
* threads.
|
||
|
*/
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
extern "C" {
|
||
|
#endif
|
||
|
|
||
|
typedef void * cothread_t;
|
||
|
|
||
|
/*
|
||
|
* co_active_context is either in a global offset table (if we are
|
||
|
* compiling -fPIC or -fPIE) or has an absolute position.
|
||
|
*/
|
||
|
static void *co_main_stack_pointer;
|
||
|
static cothread_t co_active_context = &co_main_stack_pointer;
|
||
|
|
||
|
extern cothread_t co_active() {
|
||
|
return co_active_context;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Embedded assembly.
|
||
|
*
|
||
|
* We are not using the percent-sign substitution feature,
|
||
|
* so we must write "%r1", not "%%r1".
|
||
|
*
|
||
|
* We always write 'bl malloc@plt', not 'bl malloc'. The '@plt'
|
||
|
* is necessary in position-indepent code and seems to have no
|
||
|
* significant effect in fixed-position code.
|
||
|
*
|
||
|
* We never use the 'lmw' or 'stmw' instructions. The ELF ABI
|
||
|
* mentions that these instructions "are usually slower than
|
||
|
* a sequence of other instructions that have the same effect."
|
||
|
* We instead use sequences of 'lwz' or 'stz' instructions.
|
||
|
*/
|
||
|
__asm__("\n"
|
||
|
"### embedded assembly \n"
|
||
|
".section \".text\" \n"
|
||
|
" .balign 4 \n"
|
||
|
" \n"
|
||
|
/*
|
||
|
* void co_switch(co_thread to %r3)
|
||
|
*
|
||
|
* Allocate our stack frame of 240 bytes:
|
||
|
* Old New Value
|
||
|
* 4(%r1) 244(%r1) return address, used by us
|
||
|
* 0(%r1) 240(%r1) frame pointer
|
||
|
* 232(%r1) %f31
|
||
|
* 224(%r1) %f30
|
||
|
* ...
|
||
|
* 96(%r1) %f14
|
||
|
* 92(%r1) %r31
|
||
|
* 88(%r1) %r30
|
||
|
* ...
|
||
|
* 24(%r1) %r14
|
||
|
* 20(%r1) condition register
|
||
|
* 8(%r1) padding of 12 bytes
|
||
|
* 4(%r1) return address, never used
|
||
|
* 0(%r1) frame pointer
|
||
|
*
|
||
|
* Save our registers in our stack frame.
|
||
|
* Save our stack pointer in 0(%r4).
|
||
|
* Switch to the stack of the other thread.
|
||
|
* Restore registers and return.
|
||
|
*/
|
||
|
" .globl co_switch \n"
|
||
|
" .type co_switch, @function \n"
|
||
|
"co_switch: \n"
|
||
|
" mflr %r0 # %r0 = return address \n"
|
||
|
" mfcr %r9 # %r9 = condition register \n"
|
||
|
" stwu %r1, -240(%r1) # allocate stack frame \n"
|
||
|
" \n"
|
||
|
" stw %r0, 244(%r1) # save return address \n"
|
||
|
" stfd %f31, 232(%r1) # save floating-point regs \n"
|
||
|
" stfd %f30, 224(%r1) \n"
|
||
|
" stfd %f29, 216(%r1) \n"
|
||
|
" stfd %f28, 208(%r1) \n"
|
||
|
" stfd %f27, 200(%r1) \n"
|
||
|
" stfd %f26, 192(%r1) \n"
|
||
|
" stfd %f25, 184(%r1) \n"
|
||
|
" stfd %f24, 176(%r1) \n"
|
||
|
" stfd %f23, 168(%r1) \n"
|
||
|
" stfd %f22, 160(%r1) \n"
|
||
|
" stfd %f21, 152(%r1) \n"
|
||
|
" stfd %f20, 144(%r1) \n"
|
||
|
" stfd %f19, 136(%r1) \n"
|
||
|
" stfd %f18, 128(%r1) \n"
|
||
|
" stfd %f17, 120(%r1) \n"
|
||
|
" stfd %f16, 112(%r1) \n"
|
||
|
" stfd %f16, 104(%r1) \n"
|
||
|
" stfd %f14, 96(%r1) \n"
|
||
|
" stw %r31, 92(%r1) # save general-purpose regs \n"
|
||
|
" stw %r30, 88(%r1) \n"
|
||
|
" stw %r29, 84(%r1) \n"
|
||
|
" stw %r28, 80(%r1) \n"
|
||
|
" stw %r27, 76(%r1) \n"
|
||
|
" stw %r26, 72(%r1) \n"
|
||
|
" stw %r25, 68(%r1) \n"
|
||
|
" stw %r24, 64(%r1) \n"
|
||
|
" stw %r23, 60(%r1) \n"
|
||
|
" stw %r22, 56(%r1) \n"
|
||
|
" stw %r21, 52(%r1) \n"
|
||
|
" stw %r20, 48(%r1) \n"
|
||
|
" stw %r19, 44(%r1) \n"
|
||
|
" stw %r18, 40(%r1) \n"
|
||
|
" stw %r17, 36(%r1) \n"
|
||
|
" stw %r16, 32(%r1) \n"
|
||
|
" stw %r15, 28(%r1) \n"
|
||
|
" stw %r14, 24(%r1) \n"
|
||
|
" stw %r9, 20(%r1) # save condition reg \n"
|
||
|
" \n"
|
||
|
" # save current context, set new context \n"
|
||
|
" # %r4 = co_active_context \n"
|
||
|
" # co_active_context = %r3 \n"
|
||
|
#if __PIC__ == 2
|
||
|
" # position-independent code, large model (-fPIC) \n"
|
||
|
" bl _GLOBAL_OFFSET_TABLE_@local-4 \n"
|
||
|
" mflr %r8 # %r8 = address of got \n"
|
||
|
" addis %r7, %r8, co_active_context@got@ha \n"
|
||
|
" lwz %r6, co_active_context@got@l(%r7) \n"
|
||
|
" lwz %r4, 0(%r6) \n"
|
||
|
" stw %r3, 0(%r6) \n"
|
||
|
#elif __PIC__ == 1
|
||
|
" # position-independent code, small model (-fpic) \n"
|
||
|
" bl _GLOBAL_OFFSET_TABLE_@local-4 \n"
|
||
|
" mflr %r8 # %r8 = address of got \n"
|
||
|
" lwz %r7, co_active_context@got(%r8) \n"
|
||
|
" lwz %r4, 0(%r7) \n"
|
||
|
" stw %r3, 0(%r7) \n"
|
||
|
#else
|
||
|
" # fixed-position code \n"
|
||
|
" lis %r8, co_active_context@ha \n"
|
||
|
" lwz %r4, co_active_context@l(%r8) \n"
|
||
|
" stw %r3, co_active_context@l(%r8) \n"
|
||
|
#endif
|
||
|
" \n"
|
||
|
" # save current stack pointer \n"
|
||
|
" stw %r1, 0(%r4) \n"
|
||
|
" # get new stack pointer \n"
|
||
|
" lwz %r1, 0(%r3) \n"
|
||
|
" \n"
|
||
|
" lwz %r0, 244(%r1) # get return address \n"
|
||
|
" lfd %f31, 232(%r1) # restore floating-point regs \n"
|
||
|
" lfd %f30, 224(%r1) \n"
|
||
|
" lfd %f29, 216(%r1) \n"
|
||
|
" lfd %f28, 208(%r1) \n"
|
||
|
" lfd %f27, 200(%r1) \n"
|
||
|
" lfd %f26, 192(%r1) \n"
|
||
|
" lfd %f25, 184(%r1) \n"
|
||
|
" lfd %f24, 176(%r1) \n"
|
||
|
" lfd %f23, 168(%r1) \n"
|
||
|
" lfd %f22, 160(%r1) \n"
|
||
|
" lfd %f21, 152(%r1) \n"
|
||
|
" lfd %f20, 144(%r1) \n"
|
||
|
" lfd %f19, 136(%r1) \n"
|
||
|
" lfd %f18, 128(%r1) \n"
|
||
|
" lfd %f17, 120(%r1) \n"
|
||
|
" lfd %f16, 112(%r1) \n"
|
||
|
" lfd %f16, 104(%r1) \n"
|
||
|
" lfd %f14, 96(%r1) \n"
|
||
|
" lwz %r31, 92(%r1) # restore general-purpose regs \n"
|
||
|
" lwz %r30, 88(%r1) \n"
|
||
|
" lwz %r29, 84(%r1) \n"
|
||
|
" lwz %r28, 80(%r1) \n"
|
||
|
" lwz %r27, 76(%r1) \n"
|
||
|
" lwz %r26, 72(%r1) \n"
|
||
|
" lwz %r25, 68(%r1) \n"
|
||
|
" lwz %r24, 64(%r1) \n"
|
||
|
" lwz %r23, 60(%r1) \n"
|
||
|
" lwz %r22, 56(%r1) \n"
|
||
|
" lwz %r21, 52(%r1) \n"
|
||
|
" lwz %r20, 48(%r1) \n"
|
||
|
" lwz %r19, 44(%r1) \n"
|
||
|
" lwz %r18, 40(%r1) \n"
|
||
|
" lwz %r17, 36(%r1) \n"
|
||
|
" lwz %r16, 32(%r1) \n"
|
||
|
" lwz %r15, 28(%r1) \n"
|
||
|
" lwz %r14, 24(%r1) \n"
|
||
|
" lwz %r9, 20(%r1) # get condition reg \n"
|
||
|
" \n"
|
||
|
" addi %r1, %r1, 240 # free stack frame \n"
|
||
|
" mtlr %r0 # restore return address \n"
|
||
|
" mtcr %r9 # restore condition register \n"
|
||
|
" blr # return \n"
|
||
|
" .size co_switch, . - co_switch \n"
|
||
|
" \n"
|
||
|
/*
|
||
|
* cothread_t %r3 co_create(unsigned int stack_size %r3,
|
||
|
* void (*coentry %r4)())
|
||
|
*
|
||
|
* Allocate a new stack, such that when you co_switch to that
|
||
|
* stack, then co_switch returns to coentry.
|
||
|
*/
|
||
|
" .globl co_create \n"
|
||
|
" .type co_create, @function \n"
|
||
|
"co_create: \n"
|
||
|
" mflr %r0 # %r0 = return address \n"
|
||
|
" stwu %r1, -16(%r1) # allocate my stack frame \n"
|
||
|
" stw %r0, 20(%r1) # save return address \n"
|
||
|
" stw %r31, 12(%r1) # save %r31 \n"
|
||
|
" stw %r30, 8(%r1) # save %r30 \n"
|
||
|
" \n"
|
||
|
" mr %r30, %r3 # %r30 = stack_size \n"
|
||
|
" mr %r31, %r4 # %r31 = coentry \n"
|
||
|
" \n"
|
||
|
" # Call malloc(stack_size %r3) to allocate stack; \n"
|
||
|
" # malloc() probably uses good alignment. \n"
|
||
|
" # \n"
|
||
|
" bl malloc@plt # returns %r3 = low end \n"
|
||
|
" cmpwi %r3, 0 # if returned NULL, \n"
|
||
|
" beq- 1f # then abort \n"
|
||
|
" \n"
|
||
|
" # we return %r3 = low end of stack \n"
|
||
|
" add %r4, %r3, %r30 # %r4 = high end of stack \n"
|
||
|
" \n"
|
||
|
" # uncomment if malloc() uses wrong alignment \n"
|
||
|
" #rlwinm %r4,%r4,0,0,27 # force 16-byte alignment \n"
|
||
|
" \n"
|
||
|
/*
|
||
|
* Allocate two stack frames:
|
||
|
* 16 bytes for stack frame with return address
|
||
|
* 240 bytes for co_switch stack frame
|
||
|
*
|
||
|
* Old New Value
|
||
|
* -8(%r4) 248(%r5) padding of 8 bytes
|
||
|
* -12(%r4) 244(%r5) return address = coentry
|
||
|
* -16(%r4) 240(%r5) frame pointer = NULL
|
||
|
* 232(%r5) %f31 = 0
|
||
|
* ...
|
||
|
* 20(%r5) condition register = 0
|
||
|
* 0(%r5) frame pointer
|
||
|
*/
|
||
|
" li %r9, (240-20)/4+1 \n"
|
||
|
" addi %r5, %r4, -16 # allocate first stack frame \n"
|
||
|
" li %r0, 0 \n"
|
||
|
" stwu %r5, -240(%r5) # allocate second stack frame \n"
|
||
|
" li %r8, 20 \n"
|
||
|
" mtctr %r9 # loop %r9 times \n"
|
||
|
"2: # loop to store zero to 20(%r5) through 240(%r5) \n"
|
||
|
" stwx %r0, %r5, %r8 \n"
|
||
|
" addi %r8, %r8, 4 # index += 4 \n"
|
||
|
" bdnz+ 2b # ctr -= 1, branch if nonzero \n"
|
||
|
" \n"
|
||
|
" stw %r31, 244(%r5) # return address = coentry \n"
|
||
|
" stw %r5, 0(%r3) # save stack pointer \n"
|
||
|
" \n"
|
||
|
" lwz %r0, 20(%r1) # get return address \n"
|
||
|
" lwz %r31, 12(%r1) # restore %r31 \n"
|
||
|
" lwz %r30, 8(%r1) # restore %r30 \n"
|
||
|
" mtlr %r0 # restore return address \n"
|
||
|
" addi %r1, %r1, 16 # free stack frame \n"
|
||
|
" blr # return \n"
|
||
|
" \n"
|
||
|
"1: b abort@plt # branch 1f to abort \n"
|
||
|
" .size co_create, . - co_create \n"
|
||
|
" \n"
|
||
|
/*
|
||
|
* void co_delete(cothread_t) => void free(void *)
|
||
|
*/
|
||
|
" .globl co_delete \n"
|
||
|
" .type co_delete, @function \n"
|
||
|
"co_delete: \n"
|
||
|
" b free@plt \n"
|
||
|
" \n"
|
||
|
);
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
}
|
||
|
#endif
|