From 6ca25e44aa55b8930d14ad313fa1ad5c94c4f8bf Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sat, 26 Apr 2014 03:31:53 -0700 Subject: [PATCH] Add some NEON resizing code --- src/platform/sdl/sw-main.c | 49 ++++++++++++++++++++++++----- src/util/arm-algo.S | 63 +++++++++++++++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 9 deletions(-) diff --git a/src/platform/sdl/sw-main.c b/src/platform/sdl/sw-main.c index bda2e25cf..d0b9239e4 100644 --- a/src/platform/sdl/sw-main.c +++ b/src/platform/sdl/sw-main.c @@ -18,6 +18,11 @@ #include #include +#ifdef __ARM_NEON +void _neon2x(void* dest, void* src, int width, int height); +void _neon4x(void* dest, void* src, int width, int height); +#endif + struct SoftwareRenderer { struct GBAVideoSoftwareRenderer d; struct GBASDLAudio audio; @@ -26,9 +31,11 @@ struct SoftwareRenderer { SDL_Window* window; SDL_Texture* tex; SDL_Renderer* sdlRenderer; +#else + int ratio; +#endif int viewportWidth; int viewportHeight; -#endif }; static int _GBASDLInit(struct SoftwareRenderer* renderer); @@ -47,6 +54,9 @@ int main(int argc, char** argv) { return 1; } + renderer.viewportWidth = opts.width; + renderer.viewportHeight = opts.height; + if (!_GBASDLInit(&renderer)) { return 1; } @@ -65,8 +75,6 @@ int main(int argc, char** argv) { GBAMapOptionsToContext(&opts, &context); #if SDL_VERSION_ATLEAST(2, 0, 0) - renderer.viewportWidth = opts.width; - renderer.viewportHeight = opts.height; renderer.events.fullscreen = opts.fullscreen; renderer.window = SDL_CreateWindow("GBAc", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, renderer.viewportWidth, renderer.viewportHeight, SDL_WINDOW_OPENGL | (SDL_WINDOW_FULLSCREEN_DESKTOP * renderer.events.fullscreen)); SDL_GetWindowSize(renderer.window, &renderer.viewportWidth, &renderer.viewportHeight); @@ -91,12 +99,23 @@ int main(int argc, char** argv) { #else SDL_Surface* surface = SDL_GetVideoSurface(); SDL_LockSurface(surface); - renderer.d.outputBuffer = surface->pixels; + + renderer.ratio = renderer.viewportWidth / VIDEO_HORIZONTAL_PIXELS; + if (renderer.ratio == 1) { + renderer.d.outputBuffer = surface->pixels; #ifdef COLOR_16_BIT - renderer.d.outputBufferStride = surface->pitch / 2; + renderer.d.outputBufferStride = surface->pitch / 2; #else - renderer.d.outputBufferStride = surface->pitch / 4; + renderer.d.outputBufferStride = surface->pitch / 4; #endif + } else { +#ifdef COLOR_16_BIT + renderer.d.outputBuffer = malloc(240 * 160 * 2); +#else + renderer.d.outputBuffer = malloc(240 * 160 * 4); +#endif + renderer.d.outputBufferStride = 240; + } #endif GBAThreadStart(&context); @@ -128,9 +147,9 @@ static int _GBASDLInit(struct SoftwareRenderer* renderer) { #if !SDL_VERSION_ATLEAST(2, 0, 0) #ifdef COLOR_16_BIT - SDL_SetVideoMode(240, 160, 16, SDL_DOUBLEBUF | SDL_HWSURFACE); + SDL_SetVideoMode(renderer->viewportWidth, renderer->viewportHeight, 16, SDL_DOUBLEBUF | SDL_HWSURFACE); #else - SDL_SetVideoMode(240, 160, 32, SDL_DOUBLEBUF | SDL_HWSURFACE); + SDL_SetVideoMode(renderer->viewportWidth, renderer->viewportHeight, 32, SDL_DOUBLEBUF | SDL_HWSURFACE); #endif #endif @@ -156,6 +175,20 @@ static void _GBASDLRunloop(struct GBAThread* context, struct SoftwareRenderer* r renderer->d.outputBufferStride /= 4; #endif #else + switch (renderer->ratio) { +#if defined(__ARM_NEON) && COLOR_16_BIT + case 2: + _neon2x(surface->pixels, renderer->d.outputBuffer, 240, 160); + break; + case 4: + _neon4x(surface->pixels, renderer->d.outputBuffer, 240, 160); + break; +#endif + case 1: + break; + default: + abort(); + } SDL_UnlockSurface(surface); SDL_Flip(surface); SDL_LockSurface(surface); diff --git a/src/util/arm-algo.S b/src/util/arm-algo.S index 48c3903b1..eb07ae384 100644 --- a/src/util/arm-algo.S +++ b/src/util/arm-algo.S @@ -26,6 +26,67 @@ strh r6, [r8], #2 strh r7, [r8], #2 subs r10, #8 bne .L1 -.L9: pop {r4-r10} bx lr + +# r0: Destination +# r1: Source +# r2: Width +# r3: Height +.global _neon2x +_neon2x: +push {r4-r5} +lsl r4, r2, #2 +.n20: +mov r2, r4, lsr #4 +add r5, r0, r4 +.n21: +vld2.32 {d0[], d1[]}, [r1]! +vmov d2, d0 +vmov d3, d1 +vzip.16 d0, d2 +vzip.16 d1, d3 +vst1.32 {q0}, [r0]! +vst1.32 {q0}, [r5]! +subs r2, #1 +bne .n21 +subs r3, #1 +mov r0, r5 +bne .n20 +pop {r4-r5} +bx lr + +.global _neon4x +_neon4x: +push {r4-r7} +lsl r4, r2, #3 +.n40: +mov r2, r4, lsr #5 +add r5, r0, r4 +add r6, r5, r4 +add r7, r6, r4 +.n41: +vld4.16 {d0[], d1[], d2[], d3[]}, [r1]! +vst1.16 {d0}, [r0]! +vst1.16 {d0}, [r5]! +vst1.16 {d0}, [r6]! +vst1.16 {d0}, [r7]! +vst1.16 {d1}, [r0]! +vst1.16 {d1}, [r5]! +vst1.16 {d1}, [r6]! +vst1.16 {d1}, [r7]! +vst1.16 {d2}, [r0]! +vst1.16 {d2}, [r5]! +vst1.16 {d2}, [r6]! +vst1.16 {d2}, [r7]! +vst1.16 {d3}, [r0]! +vst1.16 {d3}, [r5]! +vst1.16 {d3}, [r6]! +vst1.16 {d3}, [r7]! +subs r2, #1 +bne .n41 +subs r3, #1 +mov r0, r7 +bne .n40 +pop {r4-r7} +bx lr