2013-12-19 17:10:14 +00:00
|
|
|
@@
|
|
|
|
|
2015-05-16 05:12:19 +00:00
|
|
|
#include "build.h"
|
|
|
|
|
2013-12-19 17:10:14 +00:00
|
|
|
.arm
|
|
|
|
.align 8
|
|
|
|
|
|
|
|
.equ SH4_TIMESLICE, 448
|
|
|
|
.equ BM_BLOCKLIST_MASK, 65532 @FFFC
|
|
|
|
.equ CPU_RATIO, 5
|
|
|
|
|
2015-05-16 05:12:19 +00:00
|
|
|
#if HOST_OS == OS_DARWIN
|
2014-12-17 18:13:05 +00:00
|
|
|
#define CSYM(n) _##n
|
2015-05-16 08:04:30 +00:00
|
|
|
#define HIDDEN(n)
|
2015-05-16 05:12:19 +00:00
|
|
|
#else
|
|
|
|
#define CSYM(n) n
|
2015-05-16 08:04:30 +00:00
|
|
|
#define HIDDEN(n) .hidden CSYM(n)
|
2015-05-16 05:12:19 +00:00
|
|
|
#endif
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
@@@@@@@@@@ some helpers @@@@@@@@@@
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(do_sqw_nommu_area_3)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(do_sqw_nommu_area_3)
|
2013-12-19 17:10:14 +00:00
|
|
|
@r0: addr
|
|
|
|
@r1: sq_both
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(do_sqw_nommu_area_3):
|
2013-12-19 17:10:14 +00:00
|
|
|
add r3,r1,#0x0C000000 @ get ram ptr from r1, part 1
|
|
|
|
and r2,r0,#0x20 @ SQ# selection, isolate
|
|
|
|
ubfx r0,r0,#5,#19 @ get ram offset
|
|
|
|
add r1,r2 @ SQ# selection, add to SQ ptr
|
|
|
|
add r3,#512 @ get ram ptr from r1, part 2
|
|
|
|
add r3,r0,lsl #5 @ ram + offset
|
|
|
|
vldm r1,{d0-d3}
|
|
|
|
vstm r3,{d0-d3}
|
|
|
|
bx lr
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(TAWriteSQ)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(TAWriteSQ)
|
2013-12-19 17:10:14 +00:00
|
|
|
@r0: addr
|
|
|
|
@r1: sq_both
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(TAWriteSQ):
|
2013-12-19 17:10:14 +00:00
|
|
|
BIC R3, R0, #0xFE000000 @clear unused bits
|
|
|
|
AND R0, R0, #0x20 @SQ#, isolate
|
|
|
|
CMP R3, #0x800000 @TA write?
|
|
|
|
ADD R0, R1, R0 @SQ#, add to SQ ptr
|
2014-12-17 18:13:05 +00:00
|
|
|
BCC CSYM(_Z13ta_vtx_data32Pv) @TA write?
|
2013-12-19 17:10:14 +00:00
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(TAWriteSQ_yuv):
|
2013-12-19 17:10:14 +00:00
|
|
|
CMP R3, #0x1000000 @Yuv write ?
|
2014-12-17 18:13:05 +00:00
|
|
|
BCS CSYM(TAWriteSQ_vram)
|
2013-12-19 17:10:14 +00:00
|
|
|
MOV R1, #1
|
2014-12-17 18:13:05 +00:00
|
|
|
B CSYM(_Z8YUV_dataPjj)
|
2013-12-19 17:10:14 +00:00
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(TAWriteSQ_vram): @vram write ..
|
2014-05-20 22:58:41 +00:00
|
|
|
#ifdef TARGET_IPHONE
|
|
|
|
bkpt #0
|
|
|
|
#else
|
2013-12-19 17:10:14 +00:00
|
|
|
bkpt
|
2014-05-20 22:58:41 +00:00
|
|
|
#endif
|
2013-12-19 17:10:14 +00:00
|
|
|
ubfx r0,r3,#5,#18 @ get vram offset
|
|
|
|
add r3,r1,#0x04000000 @ get vram ptr from r1, part 1
|
|
|
|
add r3,#512 @ get ram ptr from r1, part 2
|
|
|
|
add r3,r0,lsl #5 @ ram + offset
|
|
|
|
vldm r1,{d0-d3}
|
|
|
|
vstm r3,{d0-d3}
|
|
|
|
bx lr
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
|
2015-07-25 06:39:35 +00:00
|
|
|
#if FEAT_SHREC != DYNAREC_NONE
|
2014-12-17 18:13:05 +00:00
|
|
|
|
2013-12-19 17:10:14 +00:00
|
|
|
@@@@@@@@@@ ngen_LinkBlock_*****_stub @@@@@@@@@@
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(ngen_LinkBlock_Generic_stub)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(ngen_LinkBlock_Generic_stub)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(ngen_LinkBlock_Generic_stub):
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
mov r1,r4 @ djump/pc -> in case we need it ..
|
2014-12-17 18:13:05 +00:00
|
|
|
b CSYM(ngen_LinkBlock_Shared_stub)
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(ngen_LinkBlock_cond_Branch_stub)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(ngen_LinkBlock_cond_Branch_stub)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(ngen_LinkBlock_cond_Branch_stub):
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
mov r1,#1
|
2014-12-17 18:13:05 +00:00
|
|
|
b CSYM(ngen_LinkBlock_Shared_stub)
|
2013-12-19 17:10:14 +00:00
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(ngen_LinkBlock_cond_Next_stub)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(ngen_LinkBlock_cond_Next_stub)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(ngen_LinkBlock_cond_Next_stub):
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
mov r1,#0
|
2014-12-17 18:13:05 +00:00
|
|
|
b CSYM(ngen_LinkBlock_Shared_stub)
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(ngen_LinkBlock_Shared_stub)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(ngen_LinkBlock_Shared_stub)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(ngen_LinkBlock_Shared_stub):
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
mov r0,lr
|
|
|
|
sub r0,#4 @go before the call
|
2014-12-17 18:13:05 +00:00
|
|
|
bl CSYM(rdv_LinkBlock)
|
2013-12-19 17:10:14 +00:00
|
|
|
bx r0
|
|
|
|
|
|
|
|
@@@@@@@@@@ ngen_FailedToFindBlock_ @@@@@@@@@@
|
|
|
|
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(ngen_FailedToFindBlock_)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(ngen_FailedToFindBlock_)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(ngen_FailedToFindBlock_):
|
2013-12-19 17:10:14 +00:00
|
|
|
mov r0,r4
|
2014-12-17 18:13:05 +00:00
|
|
|
bl CSYM(rdv_FailedToFindBlock)
|
2013-12-19 17:10:14 +00:00
|
|
|
bx r0
|
|
|
|
|
|
|
|
@@@@@@@@@@ ngen_blockcheckfail @@@@@@@@@@
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(ngen_blockcheckfail)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(ngen_blockcheckfail)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(ngen_blockcheckfail):
|
|
|
|
bl CSYM(rdv_BlockCheckFail)
|
2013-12-19 17:10:14 +00:00
|
|
|
bx r0
|
|
|
|
|
|
|
|
|
|
|
|
@@@@@@@@@@ ngen_mainloop @@@@@@@@@@
|
|
|
|
|
|
|
|
@ you can load the address of the sh4 reg struct on the mainloop init
|
|
|
|
@ using (u8*)regptr-(u8*)Sh4cntx
|
|
|
|
@ all registers are < 1024 bytes from that
|
|
|
|
@ so you can use reg+imm forms for it
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(ngen_mainloop)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(ngen_mainloop)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(ngen_mainloop):
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
push { r4-r12,lr }
|
|
|
|
|
|
|
|
|
2015-05-16 05:12:19 +00:00
|
|
|
#if HOST_OS == OS_DARWIN
|
Partially working dyna for iOS. Very few games working atm.
This works, but is extremelly hacky. Must be started without attached debugger, lldb doesn't want to let go of EXC_BAD_ADDRESS, but reicast really depends on it getting delivered as SIGSEGV/SIGBUS. Also xcode has a really bad day upon seeing the jit code. Oh well.
There's some dynarec bug that causes color corruption on bios logo/boot triagles, TA crash on ikaruga and infinitive loop on crazy taxi. I'd guess some fp-memory-write thingy, abi, or smth. Too bad.
- Force code to compile in arm mode (arm jit -> thumb mem functions is complicated)
- SIGILL, SIGBUS. Works w/o Mach exceptions and EXC_BAD_ADDRESS
- Code buffers move to __TEXT, munmapped && memmapped to actually work
- Primitive input. Button + start, or left (works to get out of bios date screen)
- Fixup emitter for thumb2/interworking (didn't work though, reverted to arm cc)
- Block Manager: Disable mem saving / page fault alloc-on-demand logic
- Move cycle counter to r11, r9 is not clean on iOS. Remove r11 from reg alloc list
- Cache flushes for iOS
- log to log.txt
- load game.chd
2015-01-19 07:52:12 +00:00
|
|
|
mov r11, #SH4_TIMESLICE @ load cycle counter
|
2015-05-16 05:12:19 +00:00
|
|
|
#else
|
2013-12-19 17:10:14 +00:00
|
|
|
mov r9, #SH4_TIMESLICE @ load cycle counter
|
2015-05-16 05:12:19 +00:00
|
|
|
#endif
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
mov r8, r0 @Load context
|
|
|
|
ldr r4, [r8,#-184] @load pc
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
b CSYM(no_update) @Go to mainloop !
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
@this code is here for fall-through behavior of do_iter
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(intc_sched)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(intc_sched)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(intc_sched): @ next_pc _MUST_ be on ram
|
2015-05-16 05:12:19 +00:00
|
|
|
#if HOST_OS == OS_DARWIN
|
Partially working dyna for iOS. Very few games working atm.
This works, but is extremelly hacky. Must be started without attached debugger, lldb doesn't want to let go of EXC_BAD_ADDRESS, but reicast really depends on it getting delivered as SIGSEGV/SIGBUS. Also xcode has a really bad day upon seeing the jit code. Oh well.
There's some dynarec bug that causes color corruption on bios logo/boot triagles, TA crash on ikaruga and infinitive loop on crazy taxi. I'd guess some fp-memory-write thingy, abi, or smth. Too bad.
- Force code to compile in arm mode (arm jit -> thumb mem functions is complicated)
- SIGILL, SIGBUS. Works w/o Mach exceptions and EXC_BAD_ADDRESS
- Code buffers move to __TEXT, munmapped && memmapped to actually work
- Primitive input. Button + start, or left (works to get out of bios date screen)
- Fixup emitter for thumb2/interworking (didn't work though, reverted to arm cc)
- Block Manager: Disable mem saving / page fault alloc-on-demand logic
- Move cycle counter to r11, r9 is not clean on iOS. Remove r11 from reg alloc list
- Cache flushes for iOS
- log to log.txt
- load game.chd
2015-01-19 07:52:12 +00:00
|
|
|
add r11,r11,#SH4_TIMESLICE
|
2015-05-16 05:12:19 +00:00
|
|
|
#else
|
2013-12-19 17:10:14 +00:00
|
|
|
add r9,r9,#SH4_TIMESLICE
|
2015-05-16 05:12:19 +00:00
|
|
|
#endif
|
2013-12-19 17:10:14 +00:00
|
|
|
mov r4,lr
|
2014-12-17 18:13:05 +00:00
|
|
|
bl CSYM(UpdateSystem)
|
2013-12-19 17:10:14 +00:00
|
|
|
mov lr,r4
|
|
|
|
cmp r0,#0
|
|
|
|
bxeq lr @faster than bxeq r4 (as it should, call stack cache)
|
|
|
|
|
|
|
|
do_iter:
|
|
|
|
mov r0,r4
|
2014-12-17 18:13:05 +00:00
|
|
|
bl CSYM(rdv_DoInterrupts)
|
2013-12-19 17:10:14 +00:00
|
|
|
mov r4,r0
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(no_update)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(no_update)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(no_update): @ next_pc _MUST_ be on r4 *R4 NOT R0 anymore*
|
2013-12-19 17:10:14 +00:00
|
|
|
|
2018-04-27 10:37:56 +00:00
|
|
|
@ Note: I suspect that the exit thread routine
|
|
|
|
@ below is inefficient. If anyone is familiar
|
|
|
|
@ with asm please consider optimising!
|
|
|
|
|
2018-05-15 09:13:42 +00:00
|
|
|
ldr r3, =CSYM(ngen_required) @ load r3 with the address of c variable ngen_required
|
2018-04-27 10:37:56 +00:00
|
|
|
ldr r0,[r3] @ dereference and store in r0
|
|
|
|
cmp r0,#0 @ compare r0 with numerical value 0
|
|
|
|
beq CSYM(cleanup) @ if compare is true jump to cleanup label and exit thread
|
|
|
|
|
2015-08-11 17:07:23 +00:00
|
|
|
#if DC_PLATFORM == DC_PLATFORM_NAOMI
|
|
|
|
sub r2,r8,#0x4100000
|
2015-08-10 03:27:02 +00:00
|
|
|
ubfx r1,r4,#1,#24
|
|
|
|
#else
|
2015-08-12 01:05:14 +00:00
|
|
|
sub r2,r8,#0x2100000
|
2013-12-19 17:10:14 +00:00
|
|
|
ubfx r1,r4,#1,#23
|
2015-08-10 03:27:02 +00:00
|
|
|
#endif
|
2013-12-19 17:10:14 +00:00
|
|
|
ldr pc,[r2,r1,lsl #2]
|
|
|
|
|
|
|
|
@bic r1,r4,#0xFF000000
|
|
|
|
@ldr pc,[r2,r1,lsl #1]
|
|
|
|
|
2018-04-27 10:37:56 +00:00
|
|
|
HIDDEN(cleanup)
|
|
|
|
CSYM(cleanup):
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
pop {r4-r12,lr}
|
|
|
|
bx lr
|
|
|
|
|
|
|
|
end_ngen_mainloop:
|
|
|
|
@@@@@@@@@@ ngen_mainloop @@@@@@@@@@
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(arm_compilecode)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(arm_compilecode)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(arm_compilecode):
|
|
|
|
bl CSYM(CompileCode)
|
|
|
|
b CSYM(arm_dispatch)
|
2013-12-19 17:10:14 +00:00
|
|
|
|
2014-05-20 22:58:41 +00:00
|
|
|
#ifdef TARGET_IPHONE
|
2014-12-17 18:13:05 +00:00
|
|
|
Xarm_Reg: .word CSYM(arm_Reg)
|
|
|
|
XEntryPoints: .word CSYM(EntryPoints)
|
2014-05-20 22:58:41 +00:00
|
|
|
#endif
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(arm_mainloop)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(arm_mainloop)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(arm_mainloop): @(cntx,lookup_base,cycles)
|
2013-12-19 17:10:14 +00:00
|
|
|
|
2015-05-16 05:12:19 +00:00
|
|
|
#if HOST_OS == OS_DARWIN
|
Partially working dyna for iOS. Very few games working atm.
This works, but is extremelly hacky. Must be started without attached debugger, lldb doesn't want to let go of EXC_BAD_ADDRESS, but reicast really depends on it getting delivered as SIGSEGV/SIGBUS. Also xcode has a really bad day upon seeing the jit code. Oh well.
There's some dynarec bug that causes color corruption on bios logo/boot triagles, TA crash on ikaruga and infinitive loop on crazy taxi. I'd guess some fp-memory-write thingy, abi, or smth. Too bad.
- Force code to compile in arm mode (arm jit -> thumb mem functions is complicated)
- SIGILL, SIGBUS. Works w/o Mach exceptions and EXC_BAD_ADDRESS
- Code buffers move to __TEXT, munmapped && memmapped to actually work
- Primitive input. Button + start, or left (works to get out of bios date screen)
- Fixup emitter for thumb2/interworking (didn't work though, reverted to arm cc)
- Block Manager: Disable mem saving / page fault alloc-on-demand logic
- Move cycle counter to r11, r9 is not clean on iOS. Remove r11 from reg alloc list
- Cache flushes for iOS
- log to log.txt
- load game.chd
2015-01-19 07:52:12 +00:00
|
|
|
push {r4,r5,r8,r11,lr}
|
2015-05-16 05:12:19 +00:00
|
|
|
#else
|
2013-12-19 17:10:14 +00:00
|
|
|
push {r4,r5,r8,r9,lr}
|
2015-05-16 05:12:19 +00:00
|
|
|
#endif
|
2013-12-19 17:10:14 +00:00
|
|
|
|
2014-05-20 22:58:41 +00:00
|
|
|
#ifdef TARGET_IPHONE
|
|
|
|
ldr r8,Xarm_Reg @load cntx
|
|
|
|
ldr r4,XEntryPoints @load lookup base
|
2014-05-20 23:00:54 +00:00
|
|
|
#else
|
2013-12-19 17:10:14 +00:00
|
|
|
ldr r8,=arm_Reg @load cntx
|
|
|
|
ldr r4,=EntryPoints @load lookup base
|
2014-05-20 22:58:41 +00:00
|
|
|
#endif
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
ldr r5,[r8,#192] @load cycle count
|
|
|
|
add r5,r0 @add cycles for this timeslice
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
b CSYM(arm_dispatch)
|
2013-12-19 17:10:14 +00:00
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(arm_dispatch)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(arm_dispatch)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(arm_dispatch):
|
2014-05-20 22:58:41 +00:00
|
|
|
#ifdef TARGET_IPHONE
|
|
|
|
ldrd r0,r1,[r8,#184] @load: Next PC, interrupt
|
|
|
|
#else
|
2013-12-19 17:10:14 +00:00
|
|
|
ldrd r0,[r8,#184] @load: Next PC, interrupt
|
2014-05-20 22:58:41 +00:00
|
|
|
#endif
|
|
|
|
|
2013-12-19 17:10:14 +00:00
|
|
|
ubfx r2,r0,#2,#19
|
|
|
|
cmp r1,#0
|
|
|
|
bne arm_dofiq
|
|
|
|
|
|
|
|
ldr pc,[r4,r2,lsl #2]
|
|
|
|
|
|
|
|
arm_dofiq:
|
2014-12-17 18:13:05 +00:00
|
|
|
bl CSYM(CPUFiq)
|
|
|
|
b CSYM(arm_dispatch)
|
2013-12-19 17:10:14 +00:00
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(arm_exit)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(arm_exit)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(arm_exit):
|
2013-12-19 17:10:14 +00:00
|
|
|
str r5,[r8,#192] @if timeslice is over, save remaining cycles
|
2015-05-16 05:12:19 +00:00
|
|
|
#if HOST_OS == OS_DARWIN
|
Partially working dyna for iOS. Very few games working atm.
This works, but is extremelly hacky. Must be started without attached debugger, lldb doesn't want to let go of EXC_BAD_ADDRESS, but reicast really depends on it getting delivered as SIGSEGV/SIGBUS. Also xcode has a really bad day upon seeing the jit code. Oh well.
There's some dynarec bug that causes color corruption on bios logo/boot triagles, TA crash on ikaruga and infinitive loop on crazy taxi. I'd guess some fp-memory-write thingy, abi, or smth. Too bad.
- Force code to compile in arm mode (arm jit -> thumb mem functions is complicated)
- SIGILL, SIGBUS. Works w/o Mach exceptions and EXC_BAD_ADDRESS
- Code buffers move to __TEXT, munmapped && memmapped to actually work
- Primitive input. Button + start, or left (works to get out of bios date screen)
- Fixup emitter for thumb2/interworking (didn't work though, reverted to arm cc)
- Block Manager: Disable mem saving / page fault alloc-on-demand logic
- Move cycle counter to r11, r9 is not clean on iOS. Remove r11 from reg alloc list
- Cache flushes for iOS
- log to log.txt
- load game.chd
2015-01-19 07:52:12 +00:00
|
|
|
pop {r4,r5,r8,r11,pc}
|
2015-05-16 05:12:19 +00:00
|
|
|
#else
|
|
|
|
pop {r4,r5,r8,r9,pc}
|
|
|
|
#endif
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
@@@@@@
|
|
|
|
@matrix mul
|
|
|
|
#ifndef _ANDROID
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(ftrv_asm)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(ftrv_asm)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(ftrv_asm):
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
@r0=dst,r1=vec,r2=mtx
|
|
|
|
|
|
|
|
@3x vld1.32 might be faster
|
|
|
|
vldm r2,{d16-d24}
|
|
|
|
vldm r1, {d0-d1}
|
|
|
|
|
|
|
|
VMUL.F32 Q2,Q8,d0[0]
|
|
|
|
VMLA.F32 Q2,Q9,d0[1]
|
|
|
|
VMLA.F32 Q2,Q10,d1[0]
|
|
|
|
VMLA.F32 Q2,Q11,d1[1]
|
|
|
|
|
|
|
|
vstm r0,{d4,d5}
|
|
|
|
|
|
|
|
bx lr
|
|
|
|
|
2014-12-17 18:13:05 +00:00
|
|
|
.global CSYM(fipr_asm)
|
2015-05-16 08:04:30 +00:00
|
|
|
HIDDEN(fipr_asm)
|
2014-12-17 18:13:05 +00:00
|
|
|
CSYM(fipr_asm):
|
2013-12-19 17:10:14 +00:00
|
|
|
|
|
|
|
@ vdot
|
|
|
|
@ idp=fr[n+0]*fr[m+0];
|
|
|
|
@ idp+=fr[n+1]*fr[m+1];
|
|
|
|
@ idp+=fr[n+2]*fr[m+2];
|
|
|
|
@ idp+=fr[n+3]*fr[m+3];
|
|
|
|
|
|
|
|
|
|
|
|
vldm r0, {d0,d1}
|
|
|
|
vldm r1, {d2,d3}
|
|
|
|
|
|
|
|
vmul.f32 q0,q1
|
|
|
|
@NEON is quite nice actually ! if only its performance was good enough ...
|
|
|
|
vpadd.f32 d0,d0,d1 @d0={d0[0]+d0[1], d1[0]+d1[1]}
|
|
|
|
vpadd.f32 d0,d0,d0 @d0={d0[0]+d0[1]+d1[0]+d1[1], d0[0]+d0[1]+d1[0]+d1[1]}
|
|
|
|
|
|
|
|
@store to ret ..
|
|
|
|
vmov r0,s0
|
|
|
|
bx lr
|
|
|
|
|
|
|
|
#endif
|
2014-12-17 18:13:05 +00:00
|
|
|
|
|
|
|
#endif
|