mirror of https://github.com/bsnes-emu/bsnes.git
479 lines
19 KiB
ArmAsm
Executable File
479 lines
19 KiB
ArmAsm
Executable File
;*****
|
|
;libco.ppc (2007-11-29)
|
|
;author: Vas Crabb
|
|
;license: public domain
|
|
;
|
|
;cross-platform PowerPC implementation of libco
|
|
;special thanks to byuu for writing the original version
|
|
;
|
|
;[ABI compatibility]
|
|
;- gcc; mac os x; ppc
|
|
;
|
|
;[nonvolatile registers]
|
|
;- GPR1, GPR13 - GPR31
|
|
;- FPR14 - FPR31
|
|
;- V20 - V31
|
|
;- VRSAVE, CR2 - CR4
|
|
;
|
|
;[volatile registers]
|
|
;- GPR0, GPR2 - GPR12
|
|
;- FPR0 - FPR13
|
|
;- V0 - V19
|
|
;- LR, CTR, XER, CR0, CR1, CR5 - CR7
|
|
;*****
|
|
|
|
|
|
;Declare some target-specific stuff
|
|
|
|
.section __TEXT,__text,regular,pure_instructions
|
|
.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
|
|
.machine ppc
|
|
|
|
|
|
;Constants
|
|
|
|
.cstring
|
|
.align 2
|
|
|
|
_sysctl_altivec:
|
|
.ascii "hw.optional.altivec\0"
|
|
|
|
|
|
;Declare space for variables
|
|
|
|
.lcomm _co_environ,4,2 ;bit 0 = initialised, bit 1 = have Altivec/VMX
|
|
.lcomm _co_primary_buffer,1024,2 ;buffer (will be zeroed by loader)
|
|
|
|
.data
|
|
.align 2
|
|
|
|
_co_active_context:
|
|
.long _co_primary_buffer
|
|
|
|
|
|
.text
|
|
.align 2
|
|
|
|
|
|
;Declare exported names
|
|
|
|
.globl _co_active
|
|
.globl _co_create
|
|
.globl _co_delete
|
|
.globl _co_switch
|
|
|
|
|
|
;*****
|
|
;extern "C" cothread_t co_active();
|
|
;return = GPR3
|
|
;*****
|
|
|
|
_co_active:
|
|
mflr r0 ;GPR0 = return address
|
|
bcl 20,31,L_co_active$spb
|
|
L_co_active$spb:
|
|
mflr r2 ;GPR2 set for position-independance
|
|
addis r3,r2,ha16(_co_active_context-L_co_active$spb) ;get value in GPR3
|
|
lwz r3,lo16(_co_active_context-L_co_active$spb)(r3)
|
|
mtlr r0 ;LR = return address
|
|
blr ;return
|
|
|
|
|
|
;*****
|
|
;extern "C" cothread_t co_create(unsigned int heapsize, void (*coentry)());
|
|
;GPR3 = heapsize
|
|
;GPR4 = coentry
|
|
;return = GPR3
|
|
;*****
|
|
|
|
_co_create:
|
|
mflr r0 ;GPR0 = return address
|
|
stmw r30,-8(r1) ;save GPR30 and GPR31
|
|
stw r0,8(r1) ;save return address
|
|
stwu r1,-(2*4+16+24)(r1) ;allocate 16 bytes for locals/parameters
|
|
|
|
;create heap space (stack + register storage)
|
|
addi r31,r3,1024-24 ;subtract space for linkage
|
|
mr r30,r4 ;GPR30 = coentry
|
|
addi r3,r3,1024 ;allocate extra memory for contextual info
|
|
bl L_malloc$stub ;GPR3 = malloc(heapsize + 1024)
|
|
add r4,r3,r31 ;GPR4 points to top-of-stack
|
|
rlwinm r5,r4,0,0,27 ;force 16-byte alignment
|
|
|
|
;store thread entry point + registers, so that first call to co_switch will execute coentry
|
|
stw r30,8(r5) ;store entry point
|
|
addi r6,0,2+19+18*2+12*4+1 ;clear for CR, old GPR1, 19 GPRs, 18 FPRs, 12 VRs, VRSAVE
|
|
addi r0,0,0
|
|
addi r7,0,4 ;start at 4(GPR5)
|
|
mtctr r6
|
|
L_co_create$clear_loop:
|
|
stwx r0,r5,r7 ;clear a word
|
|
addi r7,r7,-4 ;increment pointer
|
|
bdnz L_co_create$clear_loop ;loop
|
|
stwu r5,-448(r5) ;store top of stack
|
|
|
|
;initialize context memory heap and return
|
|
stw r5,0(r3) ;*cothread_t = stack heap pointer (GPR1)
|
|
lwz r1,0(r1) ;deallocate stack frame
|
|
lwz r8,8(r1) ;fetch return address
|
|
lmw r30,-8(r1) ;restore GPR30 and GPR31
|
|
mtlr r8 ;return address in LR
|
|
blr ;return
|
|
|
|
|
|
;*****
|
|
;extern "C" void co_delete(cothread_t cothread);
|
|
;GPR3 = cothread
|
|
;*****
|
|
|
|
_co_delete:
|
|
b L_free$stub ;free(GPR3)
|
|
|
|
|
|
;*****
|
|
;extern "C" void co_switch(cothread_t cothread);
|
|
;GPR3 = cothread
|
|
;*****
|
|
;
|
|
;Frame looks like:
|
|
;
|
|
;Old New Value
|
|
; 8(r1) 456(r1) Saved LR
|
|
; 4(r1) 452(r1) Saved CR
|
|
; 0(r1) 448(r1) Old GPR1
|
|
; -4(r1) 444(r1) Saved GPR31
|
|
; -8(r1) 440(r1) Saved GPR30
|
|
;... ... ...
|
|
; -72(r1) 376(r1) Saved GPR14
|
|
; -76(r1) 372(r1) Saved GPR13
|
|
; -80(r1) 368(r1) Saved VRSAVE
|
|
; -84(r1) 364(r1) +++
|
|
; -88(r1) 360(r1) Saved FPR31
|
|
; -92(r1) 356(r1) +++
|
|
; -96(r1) 352(r1) Saved FPR30
|
|
;... ... ...
|
|
;-212(r1) 236(r1) +++
|
|
;-216(r1) 232(r1) Saved FPR15
|
|
;-220(r1) 228(r1) +++
|
|
;-224(r1) 224(r1) Saved FPR14
|
|
;-228(r1) 220(r1) +++ value
|
|
;-232(r1) 216(r1) +++ len
|
|
;-236(r1) 212(r1) +++
|
|
;-240(r1) 208(r1) Saved VR31
|
|
;-244(r1) 204(r1) +++
|
|
;-248(r1) 200(r1) +++
|
|
;-252(r1) 196(r1) +++
|
|
;-256(r1) 192(r1) Saved VR30
|
|
;... ... ...
|
|
;-388(r1) 60(r1) +++
|
|
;-392(r1) 56(r1) +++
|
|
;-396(r1) 52(r1) +++
|
|
;-400(r1) 48(r1) Saved VR21
|
|
;-404(r1) 44(r1) +++
|
|
;-408(r1) 40(r1) +++ Param 5 (GPR7)
|
|
;-412(r1) 36(r1) +++ Param 4 (GPR6)
|
|
;-416(r1) 32(r1) Saved VR20 Param 3 (GPR5)
|
|
;-420(r1) 28(r1) - Param 2 (GPR4)
|
|
;-424(r1) 24(r1) - Param 1 (GPR3)
|
|
;-428(r1) 20(r1) - Reserved
|
|
;-432(r1) 16(r1) - Reserved
|
|
;-436(r1) 12(r1) - Reserved
|
|
;-440(r1) 8(r1) - New LR
|
|
;-444(r1) 4(r1) - New CR
|
|
;-448(r1) 0(r1) Saved GPR1
|
|
|
|
|
|
_co_switch:
|
|
stmw r13,-76(r1) ;save preserved GPRs
|
|
stfd f14,-224(r1) ;save preserved FPRs
|
|
stfd f15,-216(r1)
|
|
stfd f16,-208(r1)
|
|
stfd f17,-200(r1)
|
|
stfd f18,-192(r1)
|
|
stfd f19,-184(r1)
|
|
stfd f20,-176(r1)
|
|
stfd f21,-168(r1)
|
|
stfd f22,-160(r1)
|
|
stfd f23,-152(r1)
|
|
stfd f24,-144(r1)
|
|
stfd f25,-136(r1)
|
|
stfd f26,-128(r1)
|
|
stfd f27,-120(r1)
|
|
stfd f28,-112(r1)
|
|
stfd f29,-104(r1)
|
|
stfd f30,-96(r1)
|
|
stfd f31,-88(r1)
|
|
mflr r0 ;save return address
|
|
stw r0,8(r1)
|
|
mfcr r2 ;save condition codes
|
|
stw r2,4(r1)
|
|
stwu r1,-448(r1) ;create stack frame (save 19 GPRs, 18 FRPs, 12 VRs, VRSAVE)
|
|
|
|
mr r30,r3 ;save new context pointer
|
|
bcl 20,31,L_co_switch$spb ;get address of co_active_context
|
|
L_co_switch$spb:
|
|
mflr r31
|
|
|
|
addis r29,r31,ha16(_co_environ-L_co_switch$spb) ;get environment flags
|
|
lwz r8,lo16(_co_environ-L_co_switch$spb)(r29)
|
|
andis. r9,r8,0x8000 ;is it initialised?
|
|
bne+ L_co_switch$initialised
|
|
|
|
addi r0,0,4 ;len = sizeof(int)
|
|
stw r0,216(r1)
|
|
addis r3,r31,ha16(_sysctl_altivec-L_co_switch$spb) ;GPR3 = "hw.optional.altivec"
|
|
addi r3,r3,lo16(_sysctl_altivec-L_co_switch$spb)
|
|
addi r4,r1,220 ;GPR4 = &value
|
|
addi r5,r1,216 ;GPR5 = &len
|
|
addi r6,0,0 ;newp = 0
|
|
addi r7,0,0 ;newlen = 0
|
|
bl L_sysctlbyname$stub ;call sysctlbyname
|
|
lwz r2,220(r1) ;fetch result
|
|
addis r8,0,0x8000 ;set initialised bit
|
|
cmpwi cr5,r3,0 ;assume error means not present
|
|
cmpwi cr6,r2,0 ;test result
|
|
blt- cr5,L_co_switch$store_environ
|
|
beq cr6,L_co_switch$store_environ
|
|
oris r8,r8,0x4000 ;set the flag to say we have it!
|
|
L_co_switch$store_environ:
|
|
stw r8,lo16(_co_environ-L_co_switch$spb)(r29) ;store environment flags
|
|
L_co_switch$initialised:
|
|
|
|
andis. r10,r8,0x4000 ;do we have Altivec/VMX?
|
|
beq L_co_switch$save_no_vmx
|
|
mfspr r11,256 ;save VRSAVE
|
|
andi. r0,r11,0x0FFF ;short-circuit if it's zero
|
|
stw r11,368(r1)
|
|
beq L_co_switch$save_no_vmx
|
|
andi. r0,r11,0x0800 ;check bit 20
|
|
addi r2,0,32 ;starting index
|
|
beq L_co_switch$save_skip_vr20
|
|
stvx v20,r1,r2 ;save VR20
|
|
L_co_switch$save_skip_vr20:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0400 ;check bit 21
|
|
beq L_co_switch$save_skip_vr21
|
|
stvx v21,r1,r2 ;save VR21
|
|
L_co_switch$save_skip_vr21:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0200 ;check bit 22
|
|
beq L_co_switch$save_skip_vr22
|
|
stvx v22,r1,r2 ;save VR22
|
|
L_co_switch$save_skip_vr22:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0100 ;check bit 23
|
|
beq L_co_switch$save_skip_vr23
|
|
stvx v23,r1,r2 ;save VR23
|
|
L_co_switch$save_skip_vr23:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0080 ;check bit 24
|
|
beq L_co_switch$save_skip_vr24
|
|
stvx v24,r1,r2 ;save VR24
|
|
L_co_switch$save_skip_vr24:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0040 ;check bit 25
|
|
beq L_co_switch$save_skip_vr25
|
|
stvx v25,r1,r2 ;save VR25
|
|
L_co_switch$save_skip_vr25:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0020 ;check bit 26
|
|
beq L_co_switch$save_skip_vr26
|
|
stvx v26,r1,r2 ;save VR26
|
|
L_co_switch$save_skip_vr26:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0010 ;check bit 27
|
|
beq L_co_switch$save_skip_vr27
|
|
stvx v27,r1,r2 ;save VR27
|
|
L_co_switch$save_skip_vr27:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0008 ;check bit 28
|
|
beq L_co_switch$save_skip_vr28
|
|
stvx v28,r1,r2 ;save VR28
|
|
L_co_switch$save_skip_vr28:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0004 ;check bit 29
|
|
beq L_co_switch$save_skip_vr29
|
|
stvx v29,r1,r2 ;save VR29
|
|
L_co_switch$save_skip_vr29:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0002 ;check bit 30
|
|
beq L_co_switch$save_skip_vr30
|
|
stvx v30,r1,r2 ;save VR30
|
|
L_co_switch$save_skip_vr30:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0001 ;check bit 31
|
|
beq L_co_switch$save_skip_vr31
|
|
stvx v31,r1,r2 ;save VR31
|
|
L_co_switch$save_skip_vr31:
|
|
L_co_switch$save_no_vmx:
|
|
|
|
addis r4,r31,ha16(_co_active_context-L_co_switch$spb) ;save current context
|
|
lwz r5,lo16(_co_active_context-L_co_switch$spb)(r4)
|
|
stw r30,lo16(_co_active_context-L_co_switch$spb)(r4);set new context
|
|
stw r1,0(r5) ;save current stack pointer
|
|
lwz r1,0(r30) ;get new stack pointer
|
|
|
|
andis. r10,r8,0x4000 ;do we have Altivec/VMX?
|
|
beq L_co_switch$restore_no_vmx
|
|
lwz r11,368(r1) ;restore VRSAVE
|
|
andi. r0,r11,0x0FFF ;short-circuit if it's zero
|
|
mtspr 256,r11
|
|
beq L_co_switch$restore_no_vmx
|
|
andi. r0,r11,0x0800 ;check bit 20
|
|
addi r2,0,32 ;starting index
|
|
beq L_co_switch$restore_skip_vr20
|
|
lvx v20,r1,r2 ;restore VR20
|
|
L_co_switch$restore_skip_vr20:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0400 ;check bit 21
|
|
beq L_co_switch$restore_skip_vr21
|
|
lvx v21,r1,r2 ;restore VR21
|
|
L_co_switch$restore_skip_vr21:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0200 ;check bit 22
|
|
beq L_co_switch$restore_skip_vr22
|
|
lvx v22,r1,r2 ;restore VR22
|
|
L_co_switch$restore_skip_vr22:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0100 ;check bit 23
|
|
beq L_co_switch$restore_skip_vr23
|
|
lvx v23,r1,r2 ;restore VR23
|
|
L_co_switch$restore_skip_vr23:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0080 ;check bit 24
|
|
beq L_co_switch$restore_skip_vr24
|
|
lvx v24,r1,r2 ;restore VR24
|
|
L_co_switch$restore_skip_vr24:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0040 ;check bit 25
|
|
beq L_co_switch$restore_skip_vr25
|
|
lvx v25,r1,r2 ;restore VR25
|
|
L_co_switch$restore_skip_vr25:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0020 ;check bit 26
|
|
beq L_co_switch$restore_skip_vr26
|
|
lvx v26,r1,r2 ;restore VR26
|
|
L_co_switch$restore_skip_vr26:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0010 ;check bit 27
|
|
beq L_co_switch$restore_skip_vr27
|
|
lvx v27,r1,r2 ;restore VR27
|
|
L_co_switch$restore_skip_vr27:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0008 ;check bit 28
|
|
beq L_co_switch$restore_skip_vr28
|
|
lvx v28,r1,r2 ;restore VR28
|
|
L_co_switch$restore_skip_vr28:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0004 ;check bit 29
|
|
beq L_co_switch$restore_skip_vr29
|
|
lvx v29,r1,r2 ;restore VR29
|
|
L_co_switch$restore_skip_vr29:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0002 ;check bit 30
|
|
beq L_co_switch$restore_skip_vr30
|
|
lvx v30,r1,r2 ;restore VR30
|
|
L_co_switch$restore_skip_vr30:
|
|
addi r2,r2,16 ;stride
|
|
andi. r0,r11,0x0001 ;check bit 31
|
|
beq L_co_switch$restore_skip_vr31
|
|
lvx v31,r1,r2 ;restore VR31
|
|
L_co_switch$restore_skip_vr31:
|
|
L_co_switch$restore_no_vmx:
|
|
|
|
lwz r1,0(r1) ;deallocate stack frame
|
|
lwz r6,8(r1) ;return address in GPR6
|
|
lwz r7,4(r1) ;condition codes in GPR7
|
|
addi r0,0,0 ;make thread main crash if it returns
|
|
lmw r13,-76(r1) ;restore preserved GPRs
|
|
lfd f14,-224(r1) ;restore preserved FPRs
|
|
lfd f15,-216(r1)
|
|
lfd f16,-208(r1)
|
|
lfd f17,-200(r1)
|
|
lfd f18,-192(r1)
|
|
lfd f19,-184(r1)
|
|
lfd f20,-176(r1)
|
|
lfd f21,-168(r1)
|
|
lfd f22,-160(r1)
|
|
lfd f23,-152(r1)
|
|
lfd f24,-144(r1)
|
|
lfd f25,-136(r1)
|
|
lfd f26,-128(r1)
|
|
lfd f27,-120(r1)
|
|
lfd f28,-112(r1)
|
|
lfd f29,-104(r1)
|
|
lfd f30,-96(r1)
|
|
lfd f31,-88(r1)
|
|
mtlr r0
|
|
mtctr r6 ;restore return address
|
|
mtcrf 32,r7 ;restore preserved condition codes
|
|
mtcrf 16,r7
|
|
mtcrf 8,r7
|
|
bctr ;return
|
|
|
|
|
|
|
|
;Import external functions
|
|
|
|
.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
|
|
.align 5
|
|
L_malloc$stub:
|
|
.indirect_symbol _malloc
|
|
mflr r0
|
|
bcl 20,31,L_malloc$spb
|
|
L_malloc$spb:
|
|
mflr r11
|
|
addis r11,r11,ha16(L_malloc$lazy_ptr-L_malloc$spb)
|
|
mtlr r0
|
|
lwzu r12,lo16(L_malloc$lazy_ptr-L_malloc$spb)(r11)
|
|
mtctr r12
|
|
bctr
|
|
.lazy_symbol_pointer
|
|
L_malloc$lazy_ptr:
|
|
.indirect_symbol _malloc
|
|
.long dyld_stub_binding_helper
|
|
|
|
|
|
.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
|
|
.align 5
|
|
L_free$stub:
|
|
.indirect_symbol _free
|
|
mflr r0
|
|
bcl 20,31,L_free$spb
|
|
L_free$spb:
|
|
mflr r11
|
|
addis r11,r11,ha16(L_free$lazy_ptr-L_free$spb)
|
|
mtlr r0
|
|
lwzu r12,lo16(L_free$lazy_ptr-L_free$spb)(r11)
|
|
mtctr r12
|
|
bctr
|
|
.lazy_symbol_pointer
|
|
L_free$lazy_ptr:
|
|
.indirect_symbol _free
|
|
.long dyld_stub_binding_helper
|
|
|
|
|
|
.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
|
|
.align 5
|
|
L_sysctlbyname$stub:
|
|
.indirect_symbol _sysctlbyname
|
|
mflr r0
|
|
bcl 20,31,L_sysctlbyname$spb
|
|
L_sysctlbyname$spb:
|
|
mflr r11
|
|
addis r11,r11,ha16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb)
|
|
mtlr r0
|
|
lwzu r12,lo16(L_sysctlbyname$lazy_ptr-L_sysctlbyname$spb)(r11)
|
|
mtctr r12
|
|
bctr
|
|
.lazy_symbol_pointer
|
|
L_sysctlbyname$lazy_ptr:
|
|
.indirect_symbol _sysctlbyname
|
|
.long dyld_stub_binding_helper
|
|
|
|
|
|
;This needs to be here!
|
|
|
|
.subsections_via_symbols
|
|
|