From 84a14b6c00c5e099d35ecb4556ee84d5906d370a Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sat, 14 Mar 2015 00:22:06 -0700 Subject: [PATCH] Util: NEONize 16-bit color packing --- src/gba/renderers/video-software.c | 2 +- src/util/arm-algo.S | 21 ++++++++------------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/gba/renderers/video-software.c b/src/gba/renderers/video-software.c index e989366f5..ac9059143 100644 --- a/src/gba/renderers/video-software.c +++ b/src/gba/renderers/video-software.c @@ -560,7 +560,7 @@ static void GBAVideoSoftwareRendererDrawScanline(struct GBAVideoRenderer* render } #ifdef COLOR_16_BIT -#ifdef __arm__ +#ifdef __ARM_NEON _to16Bit(row, softwareRenderer->row, VIDEO_HORIZONTAL_PIXELS); #else for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x) { diff --git a/src/util/arm-algo.S b/src/util/arm-algo.S index 2c80e337c..cb344e4c1 100644 --- a/src/util/arm-algo.S +++ b/src/util/arm-algo.S @@ -1,8 +1,9 @@ -# Copyright (c) 2013-2014 Jeffrey Pfau +# Copyright (c) 2013-2015 Jeffrey Pfau # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifdef __ARM_NEON # r0: Destination # r1: Source # r2: Number of words to copy as halfwords @@ -13,28 +14,22 @@ mov r8, r0 mov r9, r1 mov r10, r2 .L0: -tst r10, #7 +tst r10, #15 beq .L1 ldr r0, [r9], #4 strh r0, [r8], #2 sub r10, #1 b .L0 .L1: -ldmia r9!, {r0-r7} -strh r0, [r8], #2 -strh r1, [r8], #2 -strh r2, [r8], #2 -strh r3, [r8], #2 -strh r4, [r8], #2 -strh r5, [r8], #2 -strh r6, [r8], #2 -strh r7, [r8], #2 -subs r10, #8 +vld4.16 {d0, d1, d2, d3}, [r9]! +vld4.16 {d4, d5, d6, d7}, [r9]! +vst2.16 {d0, d2}, [r8]! +vst2.16 {d4, d6}, [r8]! +subs r10, #16 bne .L1 pop {r4-r10} bx lr -#ifdef __ARM_NEON # r0: Destination # r1: Source # r2: Width