From 1cde5dfad891e05a4b22762a03348253900cf031 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Wed, 26 Feb 2020 23:57:32 +0100 Subject: [PATCH] libco/aarch64: Optimize co_swap_function Improves thread-switching performance using the following techniques: - Interleave stores/loads - Restore the stack pointer and link register as early as possible --- aarch64.c | 57 +++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/aarch64.c b/aarch64.c index 93ecc49d..8505b675 100644 --- a/aarch64.c +++ b/aarch64.c @@ -24,32 +24,31 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; section(text) #endif static const uint32_t co_swap_function[1024] = { - 0x6d002428, /* stp d8,d9,[x1] */ - 0x6d012c2a, /* stp d10,d11,[x1,#16] */ - 0x6d02342c, /* stp d12,d13,[x1,#32] */ - 0x6d033c2e, /* stp d14,d15,[x1,#48] */ - 0xf9002433, /* str x19,[x1,#72] */ - 0xa9055434, /* stp x20,x21,[x1,#80] */ - 0xa9065c36, /* stp x22,x23,[x1,#96] */ - 0xa9076438, /* stp x24,x25,[x1,#112] */ - 0xa9086c3a, /* stp x26,x27,[x1,#128] */ - 0xa909743c, /* stp x28,x29,[x1,#144] */ - 0x910003f0, /* mov x16,sp */ - 0xa90a7830, /* stp x16,x30,[x1,#160] */ - - 0x6d402408, /* ldp d8,d9,[x0] */ - 0x6d412c0a, /* ldp d10,d11,[x0,#16] */ - 0x6d42340c, /* ldp d12,d13,[x0,#32] */ - 0x6d433c0e, /* ldp d14,d15,[x0,#48] */ - 0xf9402413, /* ldr x19,[x0,#72] */ - 0xa9455414, /* ldp x20,x21,[x0,#80] */ - 0xa9465c16, /* ldp x22,x23,[x0,#96] */ - 0xa9476418, /* ldp x24,x25,[x0,#112] */ - 0xa9486c1a, /* ldp x26,x27,[x0,#128] */ - 0xa949741c, /* ldp x28,x29,[x0,#144] */ - 0xa94a4410, /* ldp x16,x17,[x0,#160] */ - 0x9100021f, /* mov sp,x16 */ - 0xd61f0220, /* br x17 */ + 0x910003f0, /* mov x16,sp */ + 0xa9007830, /* stp x16,x30,[x1] */ + 0xa9407810, /* ldp x16,x30,[x0] */ + 0x9100021f, /* mov sp,x16 */ + 0xa9015033, /* stp x19,x20,[x1, 16] */ + 0xa9415013, /* ldp x19,x20,[x0, 16] */ + 0xa9025835, /* stp x21,x22,[x1, 32] */ + 0xa9425815, /* ldp x21,x22,[x0, 32] */ + 0xa9036037, /* stp x23,x24,[x1, 48] */ + 0xa9436017, /* ldp x23,x24,[x0, 48] */ + 0xa9046839, /* stp x25,x26,[x1, 64] */ + 0xa9446819, /* ldp x25,x26,[x0, 64] */ + 0xa905703b, /* stp x27,x28,[x1, 80] */ + 0xa945701b, /* ldp x27,x28,[x0, 80] */ + 0xf900303d, /* str x29, [x1, 96] */ + 0xf940301d, /* ldr x29, [x0, 96] */ + 0x6d072428, /* stp d8, d9, [x1,112] */ + 0x6d472408, /* ldp d8, d9, [x0,112] */ + 0x6d082c2a, /* stp d10,d11,[x1,128] */ + 0x6d482c0a, /* ldp d10,d11,[x0,128] */ + 0x6d09342c, /* stp d12,d13,[x1,144] */ + 0x6d49340c, /* ldp d12,d13,[x0,144] */ + 0x6d0a3c2e, /* stp d14,d15,[x1,160] */ + 0x6d4a3c0e, /* ldp d14,d15,[x0,160] */ + 0xd61f03c0, /* br x30 */ }; static void co_init() { @@ -77,9 +76,9 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) if(handle = (unsigned long*)memory) { unsigned int offset = (size & ~15); unsigned long* p = (unsigned long*)((unsigned char*)handle + offset); - handle[19] = (unsigned long)p; /* x29 (frame pointer) */ - handle[20] = (unsigned long)p; /* x30 (stack pointer) */ - handle[21] = (unsigned long)entrypoint; /* x31 (link register) */ + handle[0] = (unsigned long)p; /* x16 (stack pointer) */ + handle[1] = (unsigned long)entrypoint; /* x30 (link register) */ + handle[12] = (unsigned long)p; /* x29 (frame pointer) */ } return handle;