mirror of https://github.com/bsnes-emu/bsnes.git
libco/aarch64: Optimize co_swap_function
Improves thread-switching performance using the following techniques: - Interleave stores/loads - Restore the stack pointer and link register as early as possible
This commit is contained in:
parent
7d9ebc59a0
commit
1cde5dfad8
53
aarch64.c
53
aarch64.c
|
@ -24,32 +24,31 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
|
|||
section(text)
|
||||
#endif
|
||||
static const uint32_t co_swap_function[1024] = {
|
||||
0x6d002428, /* stp d8,d9,[x1] */
|
||||
0x6d012c2a, /* stp d10,d11,[x1,#16] */
|
||||
0x6d02342c, /* stp d12,d13,[x1,#32] */
|
||||
0x6d033c2e, /* stp d14,d15,[x1,#48] */
|
||||
0xf9002433, /* str x19,[x1,#72] */
|
||||
0xa9055434, /* stp x20,x21,[x1,#80] */
|
||||
0xa9065c36, /* stp x22,x23,[x1,#96] */
|
||||
0xa9076438, /* stp x24,x25,[x1,#112] */
|
||||
0xa9086c3a, /* stp x26,x27,[x1,#128] */
|
||||
0xa909743c, /* stp x28,x29,[x1,#144] */
|
||||
0x910003f0, /* mov x16,sp */
|
||||
0xa90a7830, /* stp x16,x30,[x1,#160] */
|
||||
|
||||
0x6d402408, /* ldp d8,d9,[x0] */
|
||||
0x6d412c0a, /* ldp d10,d11,[x0,#16] */
|
||||
0x6d42340c, /* ldp d12,d13,[x0,#32] */
|
||||
0x6d433c0e, /* ldp d14,d15,[x0,#48] */
|
||||
0xf9402413, /* ldr x19,[x0,#72] */
|
||||
0xa9455414, /* ldp x20,x21,[x0,#80] */
|
||||
0xa9465c16, /* ldp x22,x23,[x0,#96] */
|
||||
0xa9476418, /* ldp x24,x25,[x0,#112] */
|
||||
0xa9486c1a, /* ldp x26,x27,[x0,#128] */
|
||||
0xa949741c, /* ldp x28,x29,[x0,#144] */
|
||||
0xa94a4410, /* ldp x16,x17,[x0,#160] */
|
||||
0xa9007830, /* stp x16,x30,[x1] */
|
||||
0xa9407810, /* ldp x16,x30,[x0] */
|
||||
0x9100021f, /* mov sp,x16 */
|
||||
0xd61f0220, /* br x17 */
|
||||
0xa9015033, /* stp x19,x20,[x1, 16] */
|
||||
0xa9415013, /* ldp x19,x20,[x0, 16] */
|
||||
0xa9025835, /* stp x21,x22,[x1, 32] */
|
||||
0xa9425815, /* ldp x21,x22,[x0, 32] */
|
||||
0xa9036037, /* stp x23,x24,[x1, 48] */
|
||||
0xa9436017, /* ldp x23,x24,[x0, 48] */
|
||||
0xa9046839, /* stp x25,x26,[x1, 64] */
|
||||
0xa9446819, /* ldp x25,x26,[x0, 64] */
|
||||
0xa905703b, /* stp x27,x28,[x1, 80] */
|
||||
0xa945701b, /* ldp x27,x28,[x0, 80] */
|
||||
0xf900303d, /* str x29, [x1, 96] */
|
||||
0xf940301d, /* ldr x29, [x0, 96] */
|
||||
0x6d072428, /* stp d8, d9, [x1,112] */
|
||||
0x6d472408, /* ldp d8, d9, [x0,112] */
|
||||
0x6d082c2a, /* stp d10,d11,[x1,128] */
|
||||
0x6d482c0a, /* ldp d10,d11,[x0,128] */
|
||||
0x6d09342c, /* stp d12,d13,[x1,144] */
|
||||
0x6d49340c, /* ldp d12,d13,[x0,144] */
|
||||
0x6d0a3c2e, /* stp d14,d15,[x1,160] */
|
||||
0x6d4a3c0e, /* ldp d14,d15,[x0,160] */
|
||||
0xd61f03c0, /* br x30 */
|
||||
};
|
||||
|
||||
static void co_init() {
|
||||
|
@ -77,9 +76,9 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void))
|
|||
if(handle = (unsigned long*)memory) {
|
||||
unsigned int offset = (size & ~15);
|
||||
unsigned long* p = (unsigned long*)((unsigned char*)handle + offset);
|
||||
handle[19] = (unsigned long)p; /* x29 (frame pointer) */
|
||||
handle[20] = (unsigned long)p; /* x30 (stack pointer) */
|
||||
handle[21] = (unsigned long)entrypoint; /* x31 (link register) */
|
||||
handle[0] = (unsigned long)p; /* x16 (stack pointer) */
|
||||
handle[1] = (unsigned long)entrypoint; /* x30 (link register) */
|
||||
handle[12] = (unsigned long)p; /* x29 (frame pointer) */
|
||||
}
|
||||
|
||||
return handle;
|
||||
|
|
Loading…
Reference in New Issue