Add some NEON resizing code

This commit is contained in:
Jeffrey Pfau 2014-04-26 03:31:53 -07:00
parent 6e32ade69e
commit 6ca25e44aa
2 changed files with 103 additions and 9 deletions

View File

@ -18,6 +18,11 @@
#include <signal.h>
#include <sys/time.h>
#ifdef __ARM_NEON
void _neon2x(void* dest, void* src, int width, int height);
void _neon4x(void* dest, void* src, int width, int height);
#endif
struct SoftwareRenderer {
struct GBAVideoSoftwareRenderer d;
struct GBASDLAudio audio;
@ -26,9 +31,11 @@ struct SoftwareRenderer {
SDL_Window* window;
SDL_Texture* tex;
SDL_Renderer* sdlRenderer;
#else
int ratio;
#endif
int viewportWidth;
int viewportHeight;
#endif
};
static int _GBASDLInit(struct SoftwareRenderer* renderer);
@ -47,6 +54,9 @@ int main(int argc, char** argv) {
return 1;
}
renderer.viewportWidth = opts.width;
renderer.viewportHeight = opts.height;
if (!_GBASDLInit(&renderer)) {
return 1;
}
@ -65,8 +75,6 @@ int main(int argc, char** argv) {
GBAMapOptionsToContext(&opts, &context);
#if SDL_VERSION_ATLEAST(2, 0, 0)
renderer.viewportWidth = opts.width;
renderer.viewportHeight = opts.height;
renderer.events.fullscreen = opts.fullscreen;
renderer.window = SDL_CreateWindow("GBAc", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, renderer.viewportWidth, renderer.viewportHeight, SDL_WINDOW_OPENGL | (SDL_WINDOW_FULLSCREEN_DESKTOP * renderer.events.fullscreen));
SDL_GetWindowSize(renderer.window, &renderer.viewportWidth, &renderer.viewportHeight);
@ -91,12 +99,23 @@ int main(int argc, char** argv) {
#else
SDL_Surface* surface = SDL_GetVideoSurface();
SDL_LockSurface(surface);
renderer.d.outputBuffer = surface->pixels;
renderer.ratio = renderer.viewportWidth / VIDEO_HORIZONTAL_PIXELS;
if (renderer.ratio == 1) {
renderer.d.outputBuffer = surface->pixels;
#ifdef COLOR_16_BIT
renderer.d.outputBufferStride = surface->pitch / 2;
renderer.d.outputBufferStride = surface->pitch / 2;
#else
renderer.d.outputBufferStride = surface->pitch / 4;
renderer.d.outputBufferStride = surface->pitch / 4;
#endif
} else {
#ifdef COLOR_16_BIT
renderer.d.outputBuffer = malloc(240 * 160 * 2);
#else
renderer.d.outputBuffer = malloc(240 * 160 * 4);
#endif
renderer.d.outputBufferStride = 240;
}
#endif
GBAThreadStart(&context);
@ -128,9 +147,9 @@ static int _GBASDLInit(struct SoftwareRenderer* renderer) {
#if !SDL_VERSION_ATLEAST(2, 0, 0)
#ifdef COLOR_16_BIT
SDL_SetVideoMode(240, 160, 16, SDL_DOUBLEBUF | SDL_HWSURFACE);
SDL_SetVideoMode(renderer->viewportWidth, renderer->viewportHeight, 16, SDL_DOUBLEBUF | SDL_HWSURFACE);
#else
SDL_SetVideoMode(240, 160, 32, SDL_DOUBLEBUF | SDL_HWSURFACE);
SDL_SetVideoMode(renderer->viewportWidth, renderer->viewportHeight, 32, SDL_DOUBLEBUF | SDL_HWSURFACE);
#endif
#endif
@ -156,6 +175,20 @@ static void _GBASDLRunloop(struct GBAThread* context, struct SoftwareRenderer* r
renderer->d.outputBufferStride /= 4;
#endif
#else
switch (renderer->ratio) {
#if defined(__ARM_NEON) && COLOR_16_BIT
case 2:
_neon2x(surface->pixels, renderer->d.outputBuffer, 240, 160);
break;
case 4:
_neon4x(surface->pixels, renderer->d.outputBuffer, 240, 160);
break;
#endif
case 1:
break;
default:
abort();
}
SDL_UnlockSurface(surface);
SDL_Flip(surface);
SDL_LockSurface(surface);

View File

@ -26,6 +26,67 @@ strh r6, [r8], #2
strh r7, [r8], #2
subs r10, #8
bne .L1
.L9:
pop {r4-r10}
bx lr
# r0: Destination
# r1: Source
# r2: Width
# r3: Height
.global _neon2x
_neon2x:
push {r4-r5}
lsl r4, r2, #2
.n20:
mov r2, r4, lsr #4
add r5, r0, r4
.n21:
vld2.32 {d0[], d1[]}, [r1]!
vmov d2, d0
vmov d3, d1
vzip.16 d0, d2
vzip.16 d1, d3
vst1.32 {q0}, [r0]!
vst1.32 {q0}, [r5]!
subs r2, #1
bne .n21
subs r3, #1
mov r0, r5
bne .n20
pop {r4-r5}
bx lr
.global _neon4x
_neon4x:
push {r4-r7}
lsl r4, r2, #3
.n40:
mov r2, r4, lsr #5
add r5, r0, r4
add r6, r5, r4
add r7, r6, r4
.n41:
vld4.16 {d0[], d1[], d2[], d3[]}, [r1]!
vst1.16 {d0}, [r0]!
vst1.16 {d0}, [r5]!
vst1.16 {d0}, [r6]!
vst1.16 {d0}, [r7]!
vst1.16 {d1}, [r0]!
vst1.16 {d1}, [r5]!
vst1.16 {d1}, [r6]!
vst1.16 {d1}, [r7]!
vst1.16 {d2}, [r0]!
vst1.16 {d2}, [r5]!
vst1.16 {d2}, [r6]!
vst1.16 {d2}, [r7]!
vst1.16 {d3}, [r0]!
vst1.16 {d3}, [r5]!
vst1.16 {d3}, [r6]!
vst1.16 {d3}, [r7]!
subs r2, #1
bne .n41
subs r3, #1
mov r0, r7
bne .n40
pop {r4-r7}
bx lr