win32: start converting filters to 32bit

This commit is contained in:
p989 2009-08-10 02:30:32 +00:00
parent c4be07bc87
commit 88f0549e85
8 changed files with 4184 additions and 1158 deletions

File diff suppressed because it is too large Load Diff

View File

@ -6,13 +6,15 @@
#include "types.h"
int systemRedShift = 10;
int systemGreenShift = 0;
int systemBlueShift = 5;
int systemRedShift = 24;
int systemGreenShift = 16;
int systemBlueShift = 8;
/*
#define RGB1(r,g,b) ((r)>>3) << systemRedShift |\
((g) >> 3) << systemGreenShift |\
((b) >> 3) << systemBlueShift\
*/
#define RGB1(r,g,b)(((r))<<systemRedShift) | (((g)) << systemGreenShift) | (((b)) << systemBlueShift) | ((255) << 0)
static void fill_rgb_row_16(u16 *from, int src_width, u8 *row, int width)
{
@ -37,6 +39,29 @@ static void fill_rgb_row_16(u16 *from, int src_width, u8 *row, int width)
}
}
static void fill_rgb_row_32(u32 *from, int src_width, u8 *row, int width)
{
u8 *copy_start = row + src_width*3;
u8 *all_stop = row + width*3;
while (row < copy_start) {
u32 color = *from++;
*row++ = ((color >> systemRedShift) & 0x1f) << 3;
*row++ = ((color >> systemGreenShift) & 0x1f) << 3;
*row++ = ((color >> systemBlueShift) & 0x1f) << 3;
}
// any remaining elements to be written to 'row' are a replica of the
// preceding pixel
u8 *p = row-3;
while (row < all_stop) {
// we're guaranteed three elements per pixel; could unroll the loop
// further, especially with a Duff's Device, but the gains would be
// probably limited (judging by profiler output)
*row++ = *p++;
*row++ = *p++;
*row++ = *p++;
}
}
void Bilinear(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
u8 *dstPtr, u32 dstPitch, int width, int height)
{
@ -221,6 +246,188 @@ void BilinearPlus(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
}
}
void Bilinear32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
u8 *dstPtr, u32 dstPitch, int width, int height)
{
u8 row_cur[3*322];
u8 row_next[3*322];
u8 *rgb_row_cur = row_cur;
u8 *rgb_row_next = row_next;
u32 *to = (u32 *)dstPtr;
u32 *to_odd = (u32 *)(dstPtr + dstPitch);
int from_width = width;
u32 *from = (u32 *)srcPtr;
fill_rgb_row_32(from, from_width, rgb_row_cur, width+1);
for(int y = 0; y < height; y++) {
u32 *from_orig = from;
u32 *to_orig = to;
if (y+1 < height)
fill_rgb_row_32(from+width+1, from_width, rgb_row_next,
width+1);
else
fill_rgb_row_32(from, from_width, rgb_row_next, width+1);
// every pixel in the src region, is extended to 4 pixels in the
// destination, arranged in a square 'quad'; if the current src
// pixel is 'a', then in what follows 'b' is the src pixel to the
// right, 'c' is the src pixel below, and 'd' is the src pixel to
// the right and down
u8 *cur_row = rgb_row_cur;
u8 *next_row = rgb_row_next;
u8 *ar = cur_row++;
u8 *ag = cur_row++;
u8 *ab = cur_row++;
u8 *cr = next_row++;
u8 *cg = next_row++;
u8 *cb = next_row++;
for(int x=0; x < width; x++) {
u8 *br = cur_row++;
u8 *bg = cur_row++;
u8 *bb = cur_row++;
u8 *dr = next_row++;
u8 *dg = next_row++;
u8 *db = next_row++;
// upper left pixel in quad: just copy it in
int m = *ar;
int mm = *ag;
int mmmm = *ab;
int mmm = RGB1(*ar, *ag, *ab);
*to++ = RGB1(*ar, *ag, *ab);
// upper right
*to++ = RGB1((*ar+*br)>>1, (*ag+*bg)>>1, (*ab+*bb)>>1);
// lower left
*to_odd++ = RGB1((*ar+*cr)>>1, (*ag+*cg)>>1, (*ab+*cb)>>1);
// lower right
*to_odd++ = RGB1((*ar+*br+*cr+*dr)>>2,
(*ag+*bg+*cg+*dg)>>2,
(*ab+*bb+*cb+*db)>>2);
// 'b' becomes 'a', 'd' becomes 'c'
ar = br;
ag = bg;
ab = bb;
cr = dr;
cg = dg;
cb = db;
}
// the "next" rgb row becomes the current; the old current rgb row is
// recycled and serves as the new "next" row
u8 *temp;
temp = rgb_row_cur;
rgb_row_cur = rgb_row_next;
rgb_row_next = temp;
// update the pointers for start of next pair of lines
from = (u32 *)((u8 *)from_orig + srcPitch);
to = (u32 *)((u8 *)to_orig + (dstPitch << 1));
to_odd = (u32 *)((u8 *)to + dstPitch);
}
}
void BilinearPlus32(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
u8 *dstPtr, u32 dstPitch, int width, int height)
{
u8 row_cur[3*322];
u8 row_next[3*322];
u8 *rgb_row_cur = row_cur;
u8 *rgb_row_next = row_next;
u32 *to = (u32 *)dstPtr;
u32 *to_odd = (u32 *)(dstPtr + dstPitch);
int from_width = width;
u32 *from = (u32 *)srcPtr;
fill_rgb_row_32(from, from_width, rgb_row_cur, width+1);
for(int y = 0; y < height; y++) {
u32 *from_orig = from;
u32 *to_orig = to;
if (y+1 < height)
fill_rgb_row_32(from+width+1, from_width, rgb_row_next,
width+1);
else
fill_rgb_row_32(from, from_width, rgb_row_next, width+1);
// every pixel in the src region, is extended to 4 pixels in the
// destination, arranged in a square 'quad'; if the current src
// pixel is 'a', then in what follows 'b' is the src pixel to the
// right, 'c' is the src pixel below, and 'd' is the src pixel to
// the right and down
u8 *cur_row = rgb_row_cur;
u8 *next_row = rgb_row_next;
u8 *ar = cur_row++;
u8 *ag = cur_row++;
u8 *ab = cur_row++;
u8 *cr = next_row++;
u8 *cg = next_row++;
u8 *cb = next_row++;
for(int x=0; x < width; x++) {
u8 *br = cur_row++;
u8 *bg = cur_row++;
u8 *bb = cur_row++;
u8 *dr = next_row++;
u8 *dg = next_row++;
u8 *db = next_row++;
// upper left pixel in quad: just copy it in
//*to++ = manip.rgb(*ar, *ag, *ab);
#ifdef USE_ORIGINAL_BILINEAR_PLUS
*to++ = RGB(
(((*ar)<<2) +((*ar)) + (*cr+*br+*br) )>> 3,
(((*ag)<<2) +((*ag)) + (*cg+*bg+*bg) )>> 3,
(((*ab)<<2) +((*ab)) + (*cb+*bb+*bb) )>> 3);
#else
*to++ = RGB1(
(((*ar)<<3) +((*ar)<<1) + (*cr+*br+*br+*cr) )>> 4,
(((*ag)<<3) +((*ag)<<1) + (*cg+*bg+*bg+*cg) )>> 4,
(((*ab)<<3) +((*ab)<<1) + (*cb+*bb+*bb+*cb) )>> 4);
#endif
// upper right
*to++ = RGB1((*ar+*br)>>1, (*ag+*bg)>>1, (*ab+*bb)>>1);
// lower left
*to_odd++ = RGB1((*ar+*cr)>>1, (*ag+*cg)>>1, (*ab+*cb)>>1);
// lower right
*to_odd++ = RGB1((*ar+*br+*cr+*dr)>>2,
(*ag+*bg+*cg+*dg)>>2,
(*ab+*bb+*cb+*db)>>2);
// 'b' becomes 'a', 'd' becomes 'c'
ar = br;
ag = bg;
ab = bb;
cr = dr;
cg = dg;
cb = db;
}
// the "next" rgb row becomes the current; the old current rgb row is
// recycled and serves as the new "next" row
u8 *temp;
temp = rgb_row_cur;
rgb_row_cur = rgb_row_next;
rgb_row_next = temp;
// update the pointers for start of next pair of lines
from = (u32 *)((u8 *)from_orig + srcPitch);
to = (u32 *)((u8 *)to_orig + (dstPitch << 1));
to_odd = (u32 *)((u8 *)to + dstPitch);
}
}
void RenderBilinear (SSurface Src, SSurface Dst)
{
@ -229,8 +436,8 @@ void RenderBilinear (SSurface Src, SSurface Dst)
lpSrc = Src.Surface;
lpDst = Dst.Surface;
Bilinear (lpSrc, Src.Pitch,
Bilinear32 (lpSrc, Src.Pitch*2,
lpSrc,
lpDst, Dst.Pitch, Src.Width, Src.Height);
lpDst, Dst.Pitch*2, Src.Width, Src.Height);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,355 @@
/*
* This file is part of the Advance project.
*
* Copyright (C) 2003 Andrea Mazzoleni
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* In addition, as a special exception, Andrea Mazzoleni
* gives permission to link the code of this program with
* the MAME library (or with modified versions of MAME that use the
* same license as MAME), and distribute linked combinations including
* the two. You must obey the GNU General Public License in all
* respects for all of the code used other than MAME. If you modify
* this file, you may extend this exception to your version of the
* file, but you are not obligated to do so. If you do not wish to
* do so, delete this exception statement from your version.
*/
#ifndef __INTERP_H
#define __INTERP_H
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
/***************************************************************************/
/* Basic types */
/***************************************************************************/
/* interpolation */
extern unsigned interp_mask[2];
extern unsigned interp_bits_per_pixel;
#define INTERP_16_MASK_1(v) (v & interp_mask[0])
#define INTERP_16_MASK_2(v) (v & interp_mask[1])
static inline u16 interp_16_521(u16 p1, u16 p2, u16 p3)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*5 + INTERP_16_MASK_1(p2)*2 + INTERP_16_MASK_1(p3)*1) / 8)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*5 + INTERP_16_MASK_2(p2)*2 + INTERP_16_MASK_2(p3)*1) / 8);
}
static inline u16 interp_16_332(u16 p1, u16 p2, u16 p3)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*3 + INTERP_16_MASK_1(p2)*3 + INTERP_16_MASK_1(p3)*2) / 8)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*3 + INTERP_16_MASK_2(p2)*3 + INTERP_16_MASK_2(p3)*2) / 8);
}
static inline u16 interp_16_611(u16 p1, u16 p2, u16 p3)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*6 + INTERP_16_MASK_1(p2) + INTERP_16_MASK_1(p3)) / 8)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*6 + INTERP_16_MASK_2(p2) + INTERP_16_MASK_2(p3)) / 8);
}
static inline u16 interp_16_71(u16 p1, u16 p2)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*7 + INTERP_16_MASK_1(p2)) / 8)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*7 + INTERP_16_MASK_2(p2)) / 8);
}
static inline u16 interp_16_211(u16 p1, u16 p2, u16 p3)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*2 + INTERP_16_MASK_1(p2) + INTERP_16_MASK_1(p3)) / 4)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*2 + INTERP_16_MASK_2(p2) + INTERP_16_MASK_2(p3)) / 4);
}
static inline u16 interp_16_772(u16 p1, u16 p2, u16 p3)
{
return INTERP_16_MASK_1(((INTERP_16_MASK_1(p1) + INTERP_16_MASK_1(p2))*7 + INTERP_16_MASK_1(p3)*2) / 16)
| INTERP_16_MASK_2(((INTERP_16_MASK_2(p1) + INTERP_16_MASK_2(p2))*7 + INTERP_16_MASK_2(p3)*2) / 16);
}
static inline u16 interp_16_11(u16 p1, u16 p2)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1) + INTERP_16_MASK_1(p2)) / 2)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1) + INTERP_16_MASK_2(p2)) / 2);
}
static inline u16 interp_16_31(u16 p1, u16 p2)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*3 + INTERP_16_MASK_1(p2)) / 4)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*3 + INTERP_16_MASK_2(p2)) / 4);
}
static inline u16 interp_16_1411(u16 p1, u16 p2, u16 p3)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*14 + INTERP_16_MASK_1(p2) + INTERP_16_MASK_1(p3)) / 16)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*14 + INTERP_16_MASK_2(p2) + INTERP_16_MASK_2(p3)) / 16);
}
static inline u16 interp_16_431(u16 p1, u16 p2, u16 p3)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*4 + INTERP_16_MASK_1(p2)*3 + INTERP_16_MASK_1(p3)) / 8)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*4 + INTERP_16_MASK_2(p2)*3 + INTERP_16_MASK_2(p3)) / 8);
}
static inline u16 interp_16_53(u16 p1, u16 p2)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*5 + INTERP_16_MASK_1(p2)*3) / 8)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*5 + INTERP_16_MASK_2(p2)*3) / 8);
}
static inline u16 interp_16_151(u16 p1, u16 p2)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*15 + INTERP_16_MASK_1(p2)) / 16)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*15 + INTERP_16_MASK_2(p2)) / 16);
}
static inline u16 interp_16_97(u16 p1, u16 p2)
{
return INTERP_16_MASK_1((INTERP_16_MASK_1(p1)*9 + INTERP_16_MASK_1(p2)*7) / 16)
| INTERP_16_MASK_2((INTERP_16_MASK_2(p1)*9 + INTERP_16_MASK_2(p2)*7) / 16);
}
#define INTERP_32_MASK_1(v) (v & 0xFF00FF)
#define INTERP_32_MASK_2(v) (v & 0x00FF00)
static inline u32 interp_32_521(u32 p1, u32 p2, u32 p3)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*5 + INTERP_32_MASK_1(p2)*2 + INTERP_32_MASK_1(p3)*1) / 8)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*5 + INTERP_32_MASK_2(p2)*2 + INTERP_32_MASK_2(p3)*1) / 8);
}
static inline u32 interp_32_332(u32 p1, u32 p2, u32 p3)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*3 + INTERP_32_MASK_1(p2)*3 + INTERP_32_MASK_1(p3)*2) / 8)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*3 + INTERP_32_MASK_2(p2)*3 + INTERP_32_MASK_2(p3)*2) / 8);
}
static inline u32 interp_32_211(u32 p1, u32 p2, u32 p3)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*2 + INTERP_32_MASK_1(p2) + INTERP_32_MASK_1(p3)) / 4)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*2 + INTERP_32_MASK_2(p2) + INTERP_32_MASK_2(p3)) / 4);
}
static inline u32 interp_32_611(u32 p1, u32 p2, u32 p3)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*6 + INTERP_32_MASK_1(p2) + INTERP_32_MASK_1(p3)) / 8)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*6 + INTERP_32_MASK_2(p2) + INTERP_32_MASK_2(p3)) / 8);
}
static inline u32 interp_32_71(u32 p1, u32 p2)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*7 + INTERP_32_MASK_1(p2)) / 8)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*7 + INTERP_32_MASK_2(p2)) / 8);
}
static inline u32 interp_32_772(u32 p1, u32 p2, u32 p3)
{
return INTERP_32_MASK_1(((INTERP_32_MASK_1(p1) + INTERP_32_MASK_1(p2))*7 + INTERP_32_MASK_1(p3)*2) / 16)
| INTERP_32_MASK_2(((INTERP_32_MASK_2(p1) + INTERP_32_MASK_2(p2))*7 + INTERP_32_MASK_2(p3)*2) / 16);
}
static inline u32 interp_32_11(u32 p1, u32 p2)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1) + INTERP_32_MASK_1(p2)) / 2)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1) + INTERP_32_MASK_2(p2)) / 2);
}
static inline u32 interp_32_31(u32 p1, u32 p2)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*3 + INTERP_32_MASK_1(p2)) / 4)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*3 + INTERP_32_MASK_2(p2)) / 4);
}
static inline u32 interp_32_1411(u32 p1, u32 p2, u32 p3)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*14 + INTERP_32_MASK_1(p2) + INTERP_32_MASK_1(p3)) / 16)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*14 + INTERP_32_MASK_2(p2) + INTERP_32_MASK_2(p3)) / 16);
}
static inline u32 interp_32_431(u32 p1, u32 p2, u32 p3)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*4 + INTERP_32_MASK_1(p2)*3 + INTERP_32_MASK_1(p3)) / 8)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*4 + INTERP_32_MASK_2(p2)*3 + INTERP_32_MASK_2(p3)) / 8);
}
static inline u32 interp_32_53(u32 p1, u32 p2)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*5 + INTERP_32_MASK_1(p2)*3) / 8)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*5 + INTERP_32_MASK_2(p2)*3) / 8);
}
static inline u32 interp_32_151(u32 p1, u32 p2)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*15 + INTERP_32_MASK_1(p2)) / 16)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*15 + INTERP_32_MASK_2(p2)) / 16);
}
static inline u32 interp_32_97(u32 p1, u32 p2)
{
return INTERP_32_MASK_1((INTERP_32_MASK_1(p1)*9 + INTERP_32_MASK_1(p2)*7) / 16)
| INTERP_32_MASK_2((INTERP_32_MASK_2(p1)*9 + INTERP_32_MASK_2(p2)*7) / 16);
}
/***************************************************************************/
/* diff */
#define INTERP_Y_LIMIT (0x30*4)
#define INTERP_U_LIMIT (0x07*4)
#define INTERP_V_LIMIT (0x06*8)
static int interp_16_diff(u16 p1, u16 p2)
{
int r, g, b;
int y, u, v;
if (p1 == p2)
return 0;
if (interp_bits_per_pixel == 16) {
b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3;
g = (int)((p1 & 0x7E0) - (p2 & 0x7E0)) >> 3;
r = (int)((p1 & 0xF800) - (p2 & 0xF800)) >> 8;
} else {
b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3;
g = (int)((p1 & 0x3E0) - (p2 & 0x3E0)) >> 2;
r = (int)((p1 & 0x7C00) - (p2 & 0x7C00)) >> 7;
}
y = r + g + b;
u = r - b;
v = -r + 2*g - b;
if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT)
return 1;
if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT)
return 1;
if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT)
return 1;
return 0;
}
static int interp_32_diff(u32 p1, u32 p2)
{
int r, g, b;
int y, u, v;
if ((p1 & 0xF8F8F8) == (p2 & 0xF8F8F8))
return 0;
b = (int)((p1 & 0xFF) - (p2 & 0xFF));
g = (int)((p1 & 0xFF00) - (p2 & 0xFF00)) >> 8;
r = (int)((p1 & 0xFF0000) - (p2 & 0xFF0000)) >> 16;
y = r + g + b;
u = r - b;
v = -r + 2*g - b;
if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT)
return 1;
if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT)
return 1;
if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT)
return 1;
return 0;
}
#define INTERP_LIMIT2 (96000)
#define ABS(x) ((x) < 0 ? -(x) : (x))
#define MAX(x,y) ((x) > (y) ? (x) : (y))
#define MIN(x,y) ((x) < (y) ? (x) : (y))
static int interp_16_diff2(u16 p1, u16 p2)
{
int r, g, b;
int y, u, v;
if ((p1 & 0xF79E) == (p2 & 0xF79E))
return 0;
if (interp_bits_per_pixel == 16) {
b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3;
g = (int)((p1 & 0x7E0) - (p2 & 0x7E0)) >> 3;
r = (int)((p1 & 0xF800) - (p2 & 0xF800)) >> 8;
} else {
b = (int)((p1 & 0x1F) - (p2 & 0x1F)) << 3;
g = (int)((p1 & 0x3E0) - (p2 & 0x3E0)) >> 2;
r = (int)((p1 & 0x7C00) - (p2 & 0x7C00)) >> 7;
}
// yb = 30*r + 58*g + 12*b;
y = 33*r + 36*g + 31*b;
u = -14*r - 29*g + 44*b;
v = 62*r - 51*g - 10*b;
if (11*ABS(y) + 8*ABS(u) + 6*ABS(v) > INTERP_LIMIT2)
return 1;
return 0;
}
static int interp_32_diff2(u32 p1, u32 p2)
{
int r, g, b;
int y, u, v;
if ((p1 & 0xF0F0F0) == (p2 & 0xF0F0F0))
return 0;
b = (int)((p1 & 0xF8) - (p2 & 0xF8));
g = (int)((p1 & 0xF800) - (p2 & 0xF800)) >> 8;
r = (int)((p1 & 0xF80000) - (p2 & 0xF80000)) >> 16;
// y = 30*r + 58*g + 12*b;
y = 33*r + 36*g + 31*b;
u = -14*r - 29*g + 44*b;
v = 62*r - 51*g - 10*b;
if (11*ABS(y) + 8*ABS(u) + 6*ABS(v) > INTERP_LIMIT2)
return 1;
return 0;
}
static void interp_set(unsigned bits_per_pixel)
{
interp_bits_per_pixel = bits_per_pixel;
switch (bits_per_pixel) {
case 15 :
interp_mask[0] = 0x7C1F;
interp_mask[1] = 0x03E0;
break;
case 16 :
interp_mask[0] = 0xF81F;
interp_mask[1] = 0x07E0;
break;
case 32 :
interp_mask[0] = 0xFF00FF;
interp_mask[1] = 0x00FF00;
break;
}
}
#endif

View File

@ -42,7 +42,7 @@ FORCEINLINE void ScanLine16_2( uint16 *lpDst, uint16 *lpSrc, unsigned int Width)
}
}
FORCEINLINE void DoubleLine16( uint16 *lpDst, uint16 *lpSrc, unsigned int Width){
FORCEINLINE void DoubleLine32( uint32 *lpDst, uint32 *lpSrc, unsigned int Width){
while(Width--){
*lpDst++ = *lpSrc;
*lpDst++ = *lpSrc++;
@ -69,17 +69,17 @@ void RenderScanline( SSurface Src, SSurface Dst)
void RenderNearest2X (SSurface Src, SSurface Dst)
{
uint16 *lpSrc;
uint32 *lpSrc;
unsigned int H;
const uint32 srcHeight = Src.Height;
const unsigned int srcPitch = Src.Pitch >> 1;
lpSrc = reinterpret_cast<uint16 *>(Src.Surface);
lpSrc = reinterpret_cast<uint32 *>(Src.Surface);
const unsigned int dstPitch = Dst.Pitch >> 1;
uint16 *lpDst = (uint16*)Dst.Surface;
uint32 *lpDst = (uint32*)Dst.Surface;
for (H = 0; H < srcHeight; H++, lpSrc += srcPitch)
DoubleLine16 (lpDst, lpSrc, Src.Width), lpDst += dstPitch,
DoubleLine16 (lpDst, lpSrc, Src.Width), lpDst += dstPitch;
DoubleLine32 (lpDst, lpSrc, Src.Width), lpDst += dstPitch,
DoubleLine32 (lpDst, lpSrc, Src.Width), lpDst += dstPitch;
}

View File

@ -720,7 +720,7 @@ template<typename T, int bpp> static void doRotate(void* dst)
{
u8* buffer = (u8*)dst;
int size = video.size();
u32* src = video.filteredbuffer32bpp;
u32* src = (u32*)video.finalBuffer();
switch(video.rotation)
{
case 0:
@ -881,24 +881,24 @@ static void DoDisplay(bool firstTime)
aggDraw.hud->attach(video.srcBuffer, 256, 384, 512);
DoDisplay_DrawHud();
}
//apply user's filter
video.filter();
}
//convert pixel format to 32bpp for compositing
//why do we do this over and over? well, we are compositing to
//filteredbuffer32bpp, and it needs to get refreshed each frame..
const int size = video.size();
u16* src = video.finalBuffer();
u16* src = (u16*)video.srcBuffer;
for(int i=0;i<size;i++)
video.filteredbuffer32bpp[i] = RGB15TO24_REVERSE(src[i]);
video.buffer[i] = RGB15TO24_REVERSE(src[i]);
//apply user's filter
video.filter();
if(!CommonSettings.single_core)
{
//draw and composite the OSD (but not if we are drawing osd straight to screen)
DoDisplay_DrawHud();
T_AGG_RGBA target((u8*)video.filteredbuffer32bpp, video.width,video.height,video.width*4);
T_AGG_RGBA target((u8*)video.finalBuffer(), video.width,video.height,video.width*4);
target.transformImage(aggDraw.hud->image<T_AGG_PF_RGBA>(), 0,0,video.width-1,video.height-1);
aggDraw.hud->clear();
}

View File

@ -13,8 +13,8 @@ public:
int currentfilter;
u8* srcBuffer;
CACHE_ALIGN u8 filteredbuffer[4*256*192*4];
CACHE_ALIGN u32 filteredbuffer32bpp[4*256*192*2];
CACHE_ALIGN u32 buffer[4*256*192*2];
CACHE_ALIGN u32 filteredbuffer[4*256*192*2];
enum {
NONE,
@ -56,7 +56,7 @@ public:
u16* finalBuffer() const
{
if(currentfilter == NONE)
return (u16*)srcBuffer;
return (u16*)buffer;
else return (u16*)filteredbuffer;
}
@ -65,7 +65,7 @@ public:
src.Height = 384;
src.Width = 256;
src.Pitch = 512;
src.Surface = (u8*)srcBuffer;
src.Surface = (u8*)buffer;
dst.Height = 768;
dst.Width = 512;