Video Filters:

- Update XBRZ filters to v1.1 (thanks Zenju!) Addresses feature request #160 - https://sourceforge.net/p/desmume/feature-requests/160/
This commit is contained in:
rogerman 2014-11-19 23:00:19 +00:00
parent 16f9140960
commit 588744d323
2 changed files with 173 additions and 110 deletions

View File

@ -13,6 +13,8 @@
// * do so, delete this exception statement from your version. *
// ****************************************************************************
// 2014-11-18 (rogerman): Update to XBRZ 1.1.
//
// 2014-02-06 (rogerman): Modified for use in DeSmuME by removing C++11 code.
// Also add render functions compatible with filter.h.
@ -32,40 +34,37 @@ inline unsigned char getRed (uint32_t val) { return getByte<2>(val); }
inline unsigned char getGreen(uint32_t val) { return getByte<1>(val); }
inline unsigned char getBlue (uint32_t val) { return getByte<0>(val); }
template <class T> inline
T abs(T value)
{
//static_assert(std::is_signed<T>::value, "");
//static_assert(std::is_signed<T>::value, "abs() requires signed types");
return value < 0 ? -value : value;
}
static const uint32_t alphaMask = 0xFF000000;
static const uint32_t redMask = 0x00FF0000;
static const uint32_t greenMask = 0x0000FF00;
static const uint32_t blueMask = 0x000000FF;
template <unsigned int N, unsigned int M> inline
void alphaBlend(uint32_t& dst, uint32_t col) //blend color over destination with opacity N / M
template <unsigned int M, unsigned int N> inline
void alphaBlend(uint32_t& dst, uint32_t col) //blend color over destination with opacity M / N
{
//static_assert(N < 256, "possible overflow of (col & redMask) * N");
//static_assert(M < 256, "possible overflow of (col & redMask ) * N + (dst & redMask ) * (M - N)");
//static_assert(0 < N && N < M, "");
//dst = (redMask & ((col & redMask ) * N + (dst & redMask ) * (M - N)) / M) | //this works because 8 upper bits are free
// (greenMask & ((col & greenMask) * N + (dst & greenMask) * (M - N)) / M) |
// (blueMask & ((col & blueMask ) * N + (dst & blueMask ) * (M - N)) / M);
//static_assert(0 < M && M < N && N <= 256, "possible overflow of (col & byte1Mask) * M + (dst & byte1Mask) * (N - M)");
// 2014-02-06 (rogerman): Modified to take the alpha channel into account.
dst = ((((col >> 24) * N + (dst >> 24) * (M - N) ) / M) << 24) |
(redMask & (((col & redMask ) * N + (dst & redMask ) * (M - N)) / M)) |
(greenMask & (((col & greenMask) * N + (dst & greenMask) * (M - N)) / M)) |
(blueMask & (((col & blueMask ) * N + (dst & blueMask ) * (M - N)) / M));
const uint32_t byte1Mask = 0x000000ff;
const uint32_t byte2Mask = 0x0000ff00;
const uint32_t byte3Mask = 0x00ff0000;
const uint32_t byte4Mask = 0xff000000;
dst = (byte1Mask & (((col & byte1Mask) * M + (dst & byte1Mask) * (N - M)) / N)) | //
(byte2Mask & (((col & byte2Mask) * M + (dst & byte2Mask) * (N - M)) / N)) | //this works because next higher 8 bits are free
(byte3Mask & (((col & byte3Mask) * M + (dst & byte3Mask) * (N - M)) / N)) | //
(byte4Mask & (((((col & byte4Mask) >> 8) * M + ((dst & byte4Mask) >> 8) * (N - M)) / N) << 8)); //next 8 bits are not free, so shift
//the last row operating on a potential alpha channel costs only ~1% perf => negligible!
}
//inline
//double fastSqrt(double n)
//{
// __asm //speeds up xBRZ by about 9% compared to std::sqrt
// __asm //speeds up xBRZ by about 9% compared to std::sqrt which internally uses the same assembler instructions but adds some "fluff"
// {
// fld n
// fsqrt
@ -74,17 +73,17 @@ void alphaBlend(uint32_t& dst, uint32_t col) //blend color over destination with
//
inline
uint32_t alphaBlend2(uint32_t pix1, uint32_t pix2, double alpha)
{
return (redMask & static_cast<uint32_t>((pix1 & redMask ) * alpha + (pix2 & redMask ) * (1 - alpha))) |
(greenMask & static_cast<uint32_t>((pix1 & greenMask) * alpha + (pix2 & greenMask) * (1 - alpha))) |
(blueMask & static_cast<uint32_t>((pix1 & blueMask ) * alpha + (pix2 & blueMask ) * (1 - alpha)));
}
//inline
//uint32_t alphaBlend2(uint32_t pix1, uint32_t pix2, double alpha)
//{
// return (redMask & static_cast<uint32_t>((pix1 & redMask ) * alpha + (pix2 & redMask ) * (1 - alpha))) |
// (greenMask & static_cast<uint32_t>((pix1 & greenMask) * alpha + (pix2 & greenMask) * (1 - alpha))) |
// (blueMask & static_cast<uint32_t>((pix1 & blueMask ) * alpha + (pix2 & blueMask ) * (1 - alpha)));
//}
uint32_t* byteAdvance( uint32_t* ptr, int bytes) { return reinterpret_cast< uint32_t*>(reinterpret_cast< char*>(ptr) + bytes); }
const uint32_t* byteAdvance(const uint32_t* ptr, int bytes) { return reinterpret_cast<const uint32_t*>(reinterpret_cast<const char*>(ptr) + bytes); }
uint32_t* byteAdvance( uint32_t* ptr, int bytes) { return reinterpret_cast< uint32_t*>(reinterpret_cast< char*>(ptr) + bytes); }
const uint32_t* byteAdvance(const uint32_t* ptr, int bytes) { return reinterpret_cast<const uint32_t*>(reinterpret_cast<const char*>(ptr) + bytes); }
//fill block with the given color
@ -202,7 +201,7 @@ void rgbtoLuv(uint32_t c, double& L, double& u, double& v)
if ( var_Y > 0.008856 ) var_Y = std::pow(var_Y , 1.0/3 );
else var_Y = 7.787 * var_Y + 16.0 / 116;
const double ref_X = 95.047; //Observer= 2°, Illuminant= D65
const double ref_X = 95.047; //Observer= 2°, Illuminant= D65
const double ref_Y = 100.000;
const double ref_Z = 108.883;
@ -238,7 +237,7 @@ void rgbtoLab(uint32_t c, unsigned char& L, signed char& A, signed char& B)
double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
//------XYZ to Lab------
const double refX = 95.047; //
const double refY = 100.000; //Observer= 2°, Illuminant= D65
const double refY = 100.000; //Observer= 2°, Illuminant= D65
const double refZ = 108.883; //
double var_X = x / refX;
double var_Y = y / refY;
@ -393,8 +392,10 @@ double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)
const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //
const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2); //substraction for int is noticeable faster than for double!
const double k_b = 0.0722; //ITU-R BT.709 conversion
const double k_r = 0.2126; //
//const double k_b = 0.0722; //ITU-R BT.709 conversion
//const double k_r = 0.2126; //
const double k_b = 0.0593; //ITU-R BT.2020 conversion
const double k_r = 0.2627; //
const double k_g = 1 - k_b - k_r;
const double scale_b = 0.5 / (1 - k_b);
@ -405,8 +406,24 @@ double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)
const double c_r = scale_r * (r_diff - y);
//we skip division by 255 to have similar range like other distance functions
//return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r));
return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r)+ square(static_cast<int>(getAlpha(pix1)) - getAlpha(pix2)));
return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r));
}
inline
double distYCbCrAlpha(uint32_t pix1, uint32_t pix2, double lumaWeight)
{
const double a1 = getAlpha(pix1) / 255.0 ;
const double a2 = getAlpha(pix2) / 255.0 ;
/*
Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1]
1. if a1 = a2, distance should be: a1 * distYCbCr()
2. if a1 = 0, distance should be: a2 * distYCbCr(black, white) = a2 * 255
3. if a1 = 1, distance should be: 255 * (1 - a2) + a2 * distYCbCr()
*/
return std::min(a1, a2) * distYCbCr(pix1, pix2, lumaWeight) + 255 * abs(a1 - a2);
}
@ -437,30 +454,14 @@ double distYUV(uint32_t pix1, uint32_t pix2, double luminanceWeight)
#ifndef NDEBUG
const double eps = 0.5;
#endif
assert(std::abs(y) <= 255 + eps);
assert(std::abs(u) <= 255 * 2 * u_max + eps);
assert(std::abs(v) <= 255 * 2 * v_max + eps);
assert(abs(y) <= 255 + eps);
assert(abs(u) <= 255 * 2 * u_max + eps);
assert(abs(v) <= 255 * 2 * v_max + eps);
return std::sqrt(square(luminanceWeight * y) + square(u) + square(v));
}
inline
double colorDist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
{
if (pix1 == pix2) //about 8% perf boost
return 0;
//return distHSL(pix1, pix2, luminanceWeight);
//return distRGB(pix1, pix2);
//return distLAB(pix1, pix2);
//return distNonLinearRGB(pix1, pix2);
//return distYUV(pix1, pix2, luminanceWeight);
return distYCbCr(pix1, pix2, luminanceWeight);
}
enum BlendType
{
BLEND_NONE = 0,
@ -486,10 +487,11 @@ struct Kernel_4x4 //kernel for preprocessing step
/**/m, n, o, p;
};
template <class ColorDistance>
FORCE_INLINE
double ppCornerDist(uint32_t col1, uint32_t col2, const xbrz::ScalerCfg& cfg)
double ppCornerDist(uint32_t col1, uint32_t col2, const double lumWeight)
{
return colorDist(col1, col2, cfg.luminanceWeight_);
return ColorDistance::dist(col1, col2, lumWeight);
}
/*
@ -504,6 +506,7 @@ input kernel area naming convention:
| M | N | O | P |
-----------------
*/
template <class ColorDistance>
FORCE_INLINE //detect blend direction
BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg) //result: F, G, J, K corners of "GradientType"
{
@ -515,11 +518,11 @@ BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg)
ker.g == ker.k))
return result;
//auto dist = [&](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
//auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight_); };
const int weight = 4;
double jg = ppCornerDist(ker.i, ker.f, cfg) + ppCornerDist(ker.f, ker.c, cfg) + ppCornerDist(ker.n, ker.k, cfg) + ppCornerDist(ker.k, ker.h, cfg) + weight * ppCornerDist(ker.j, ker.g, cfg);
double fk = ppCornerDist(ker.e, ker.j, cfg) + ppCornerDist(ker.j, ker.o, cfg) + ppCornerDist(ker.b, ker.g, cfg) + ppCornerDist(ker.g, ker.l, cfg) + weight * ppCornerDist(ker.f, ker.k, cfg);
double jg = ppCornerDist<ColorDistance>(ker.i, ker.f, cfg.luminanceWeight_) + ppCornerDist<ColorDistance>(ker.f, ker.c, cfg.luminanceWeight_) + ppCornerDist<ColorDistance>(ker.n, ker.k, cfg.luminanceWeight_) + ppCornerDist<ColorDistance>(ker.k, ker.h, cfg.luminanceWeight_) + weight * ppCornerDist<ColorDistance>(ker.j, ker.g, cfg.luminanceWeight_);
double fk = ppCornerDist<ColorDistance>(ker.e, ker.j, cfg.luminanceWeight_) + ppCornerDist<ColorDistance>(ker.j, ker.o, cfg.luminanceWeight_) + ppCornerDist<ColorDistance>(ker.b, ker.g, cfg.luminanceWeight_) + ppCornerDist<ColorDistance>(ker.g, ker.l, cfg.luminanceWeight_) + weight * ppCornerDist<ColorDistance>(ker.f, ker.k, cfg.luminanceWeight_);
if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
{
@ -602,19 +605,21 @@ int debugPixelY = 84;
bool breakIntoDebugger = false;
#endif
template <class ColorDistance>
FORCE_INLINE
double sPixEQ(uint32_t col1, uint32_t col2, const xbrz::ScalerCfg& cfg)
{
return colorDist(col1, col2, cfg.luminanceWeight_) < cfg.equalColorTolerance_;
return ColorDistance::dist(col1, col2, cfg.luminanceWeight_) < cfg.equalColorTolerance_;
}
template <class ColorDistance>
FORCE_INLINE
double sPixDist(uint32_t col1, uint32_t col2, const xbrz::ScalerCfg& cfg)
double sPixDist(uint32_t col1, uint32_t col2, const double lumWeight)
{
return colorDist(col1, col2, cfg.luminanceWeight_);
return ColorDistance::dist(col1, col2, lumWeight);
}
template <RotationDegree rotDeg>
template <RotationDegree rotDeg, class ColorDistance>
FORCE_INLINE
const bool sPixDoLineBlend(const Kernel_3x3& ker, const char blend, const xbrz::ScalerCfg& cfg)
{
@ -632,15 +637,17 @@ const bool sPixDoLineBlend(const Kernel_3x3& ker, const char blend, const xbrz::
return true;
//make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
if (getTopR(blend) != BLEND_NONE && !sPixEQ(e, g, cfg)) //but support double-blending for 90° corners
if (getTopR(blend) != BLEND_NONE && !sPixEQ<ColorDistance>(e, g, cfg)) //but support double-blending for 90° corners
return false;
if (getBottomL(blend) != BLEND_NONE && !sPixEQ(e, c, cfg))
if (getBottomL(blend) != BLEND_NONE && !sPixEQ<ColorDistance>(e, c, cfg))
return false;
//no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
if (sPixEQ(g, h, cfg) && sPixEQ(h , i, cfg) && sPixEQ(i, f, cfg) && sPixEQ(f, c, cfg) && !sPixEQ(e, i, cfg))
if (sPixEQ<ColorDistance>(g, h, cfg) && sPixEQ<ColorDistance>(h , i, cfg) && sPixEQ<ColorDistance>(i, f, cfg) && sPixEQ<ColorDistance>(f, c, cfg) && !sPixEQ<ColorDistance>(e, i, cfg))
return false;
return true;
#undef a
#undef b
#undef c
@ -664,7 +671,7 @@ input kernel area naming convention:
| G | H | I |
-------------
*/
template <class Scaler, RotationDegree rotDeg>
template <class Scaler, class ColorDistance, RotationDegree rotDeg>
FORCE_INLINE //perf: quite worth it!
void scalePixel(const Kernel_3x3& ker,
uint32_t* target, int trgWidth,
@ -690,8 +697,8 @@ void scalePixel(const Kernel_3x3& ker,
if (getBottomR(blend) >= BLEND_NORMAL)
{/*
auto eq = [&](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_) < cfg.equalColorTolerance_; };
auto dist = [&](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
auto eq = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight_) < cfg.equalColorTolerance_; };
auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, cfg.luminanceWeight_); };
const bool doLineBlend = [&]() -> bool
{
@ -699,7 +706,7 @@ void scalePixel(const Kernel_3x3& ker,
return true;
//make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90° corners
if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90° corners
return false;
if (getBottomL(blend) != BLEND_NONE && !eq(e, c))
return false;
@ -711,14 +718,14 @@ void scalePixel(const Kernel_3x3& ker,
return true;
}();
*/
const uint32_t px = sPixDist(e, f, cfg) <= sPixDist(e, h, cfg) ? f : h; //choose most similar color
const uint32_t px = sPixDist<ColorDistance>(e, f, cfg.luminanceWeight_) <= sPixDist<ColorDistance>(e, h, cfg.luminanceWeight_) ? f : h; //choose most similar color
OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth);
if (sPixDoLineBlend<rotDeg>(ker, blend, cfg))
if (sPixDoLineBlend<rotDeg, ColorDistance>(ker, blend, cfg))
{
const double fg = sPixDist(f, g, cfg); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
const double hc = sPixDist(h, c, cfg); //
const double fg = sPixDist<ColorDistance>(f, g, cfg.luminanceWeight_); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
const double hc = sPixDist<ColorDistance>(h, c, cfg.luminanceWeight_); //
const bool haveShallowLine = cfg.steepDirectionThreshold * fg <= hc && e != g && d != g;
const bool haveSteepLine = cfg.steepDirectionThreshold * hc <= fg && e != c && b != c;
@ -753,7 +760,8 @@ void scalePixel(const Kernel_3x3& ker,
#undef i
}
template <class Scaler> //scaler policy: see "Scaler2x" reference implementation
template <class Scaler, class ColorDistance> //scaler policy: see "Scaler2x" reference implementation
void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
{
yFirst = std::max(yFirst, 0);
@ -787,7 +795,7 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
const int x_p1 = std::min(x + 1, srcWidth - 1);
const int x_p2 = std::min(x + 2, srcWidth - 1);
Kernel_4x4 ker = {}; //perf: initialization is negligable
Kernel_4x4 ker = {}; //perf: initialization is negligible
ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
ker.b = s_m1[x];
ker.c = s_m1[x_p1];
@ -808,7 +816,7 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
ker.o = s_p2[x_p1];
ker.p = s_p2[x_p2];
const BlendResult res = preProcessCorners(ker, cfg);
const BlendResult res = preProcessCorners<ColorDistance>(ker, cfg);
/*
preprocessing blend result:
---------
@ -849,7 +857,7 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
//evaluate the four corners on bottom-right of current pixel
unsigned char blend_xy = 0; //for current (x, y) position
{
Kernel_4x4 ker = {}; //perf: initialization is negligable
Kernel_4x4 ker = {}; //perf: initialization is negligible
ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
ker.b = s_m1[x];
ker.c = s_m1[x_p1];
@ -870,7 +878,7 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
ker.o = s_p2[x_p1];
ker.p = s_p2[x_p2];
const BlendResult res = preProcessCorners(ker, cfg);
const BlendResult res = preProcessCorners<ColorDistance>(ker, cfg);
/*
preprocessing blend result:
---------
@ -898,7 +906,7 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
//blend four corners of current pixel
if (blendingNeeded(blend_xy)) //good 20% perf-improvement
{
Kernel_3x3 ker = {}; //perf: initialization is negligable
Kernel_3x3 ker = {}; //perf: initialization is negligible
ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
ker.b = s_m1[x];
@ -912,15 +920,16 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
ker.h = s_p1[x];
ker.i = s_p1[x_p1];
scalePixel<Scaler, ROT_0 >(ker, out, trgWidth, blend_xy, cfg);
scalePixel<Scaler, ROT_90 >(ker, out, trgWidth, blend_xy, cfg);
scalePixel<Scaler, ROT_180>(ker, out, trgWidth, blend_xy, cfg);
scalePixel<Scaler, ROT_270>(ker, out, trgWidth, blend_xy, cfg);
scalePixel<Scaler, ColorDistance, ROT_0 >(ker, out, trgWidth, blend_xy, cfg);
scalePixel<Scaler, ColorDistance, ROT_90 >(ker, out, trgWidth, blend_xy, cfg);
scalePixel<Scaler, ColorDistance, ROT_180>(ker, out, trgWidth, blend_xy, cfg);
scalePixel<Scaler, ColorDistance, ROT_270>(ker, out, trgWidth, blend_xy, cfg);
}
}
}
}
//------------------------------------------------------------------------------------
struct Scaler2x
{
@ -1010,7 +1019,7 @@ struct Scaler3x
{
//model a round corner
alphaBlend<45, 100>(out.template ref<2, 2>(), col); //exact: 0.4545939598
//alphaBlend<14, 1000>(out.template ref<2, 1>(), col); //0.01413008627 -> negligable
//alphaBlend<14, 1000>(out.template ref<2, 1>(), col); //0.01413008627 -> negligible
//alphaBlend<14, 1000>(out.template ref<1, 2>(), col); //0.01413008627
}
};
@ -1154,33 +1163,80 @@ struct Scaler5x
alphaBlend<86, 100>(out.template ref<4, 4>(), col); //exact: 0.8631434088
alphaBlend<23, 100>(out.template ref<4, 3>(), col); //0.2306749731
alphaBlend<23, 100>(out.template ref<3, 4>(), col); //0.2306749731
//alphaBlend<8, 1000>(out.template ref<4, 2>(), col); //0.008384061834 -> negligable
//alphaBlend<8, 1000>(out.template ref<4, 2>(), col); //0.008384061834 -> negligible
//alphaBlend<8, 1000>(out.template ref<2, 4>(), col); //0.008384061834
}
};
//------------------------------------------------------------------------------------
struct ColorDistanceRGB
{
static double dist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
{
if (pix1 == pix2) //about 8% perf boost
return 0;
return distYCbCr(pix1, pix2, luminanceWeight);
}
};
struct ColorDistanceARGB
{
static double dist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
{
if (pix1 == pix2)
return 0;
return distYCbCrAlpha(pix1, pix2, luminanceWeight);
}
};
}
void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, ColorFormat colFmt, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
{
switch (factor)
switch (colFmt)
{
case 2:
return scaleImage<Scaler2x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 3:
return scaleImage<Scaler3x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 4:
return scaleImage<Scaler4x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 5:
return scaleImage<Scaler5x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case ColorFormatARGB:
switch (factor)
{
case 2:
return scaleImage<Scaler2x, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 3:
return scaleImage<Scaler3x, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 4:
return scaleImage<Scaler4x, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 5:
return scaleImage<Scaler5x, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
}
case ColorFormatRGB:
switch (factor)
{
case 2:
return scaleImage<Scaler2x, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 3:
return scaleImage<Scaler3x, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 4:
return scaleImage<Scaler4x, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 5:
return scaleImage<Scaler5x, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
}
}
assert(false);
}
bool xbrz::equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
bool xbrz::equalColorTest(uint32_t col1, uint32_t col2, ColorFormat colFmt, double luminanceWeight, double equalColorTolerance)
{
return colorDist(col1, col2, luminanceWeight) < equalColorTolerance;
switch (colFmt)
{
case ColorFormatARGB:
return ColorDistanceARGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
case ColorFormatRGB:
return ColorDistanceRGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
}
assert(false);
return false;
}
@ -1257,20 +1313,20 @@ void xbrz::nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight
void Render2xBRZ(SSurface Src, SSurface Dst)
{
xbrz::scale(2, (const uint32_t *)Src.Surface, (uint32_t *)Dst.Surface, Src.Width, Src.Height);
xbrz::scale(2, (const uint32_t *)Src.Surface, (uint32_t *)Dst.Surface, Src.Width, Src.Height, xbrz::ColorFormatRGB);
}
void Render3xBRZ(SSurface Src, SSurface Dst)
{
xbrz::scale(3, (const uint32_t *)Src.Surface, (uint32_t *)Dst.Surface, Src.Width, Src.Height);
xbrz::scale(3, (const uint32_t *)Src.Surface, (uint32_t *)Dst.Surface, Src.Width, Src.Height, xbrz::ColorFormatRGB);
}
void Render4xBRZ(SSurface Src, SSurface Dst)
{
xbrz::scale(4, (const uint32_t *)Src.Surface, (uint32_t *)Dst.Surface, Src.Width, Src.Height);
xbrz::scale(4, (const uint32_t *)Src.Surface, (uint32_t *)Dst.Surface, Src.Width, Src.Height, xbrz::ColorFormatRGB);
}
void Render5xBRZ(SSurface Src, SSurface Dst)
{
xbrz::scale(5, (const uint32_t *)Src.Surface, (uint32_t *)Dst.Surface, Src.Width, Src.Height);
xbrz::scale(5, (const uint32_t *)Src.Surface, (uint32_t *)Dst.Surface, Src.Width, Src.Height, xbrz::ColorFormatRGB);
}

View File

@ -13,6 +13,8 @@
// * do so, delete this exception statement from your version. *
// ****************************************************************************
// 2014-11-18 (rogerman): Update to XBRZ 1.1.
//
// 2014-02-06 (rogerman): Modified for use in DeSmuME by removing C++11 code.
// Also integrate xbrz's config.h file into this one.
@ -38,24 +40,28 @@ namespace xbrz
using a modified approach of xBR:
http://board.byuu.org/viewtopic.php?f=10&t=2248
- new rule set preserving small image features
- support alpha channel
- support multithreading
- support 64 bit architectures
- support 64-bit architectures
- support processing image slices
*/
enum ColorFormat //from high bits -> low bits, 8 bit per channel
{
ColorFormatARGB, //including alpha channel, BGRA byte order on little-endian machines
ColorFormatRGB, //8 bit for each red, green, blue, upper 8 bits unused
};
/*
-> map source (srcWidth * srcHeight) to target (scale * width x scale * height) image, optionally processing a half-open slice of rows [yFirst, yLast) only
-> color format: ARGB (BGRA byte order), alpha channel unused
-> support for source/target pitch in bytes!
-> if your emulator changes only a few image slices during each cycle (e.g. Dosbox) then there's no need to run xBRZ on the complete image:
-> if your emulator changes only a few image slices during each cycle (e.g. DOSBox) then there's no need to run xBRZ on the complete image:
Just make sure you enlarge the source image slice by 2 rows on top and 2 on bottom (this is the additional range the xBRZ algorithm is using during analysis)
Caveat: If there are multiple changed slices, make sure they do not overlap after adding these additional rows in order to avoid a memory race condition
if you are using multiple threads for processing each enlarged slice!
in the target image data if you are using multiple threads for processing each enlarged slice!
THREAD-SAFETY: - parts of the same image may be scaled by multiple threads as long as the [yFirst, yLast) ranges do not overlap!
- there is a minor inefficiency for the first row of a slice, so avoid processing single rows only
*/
struct ScalerCfg
{
@ -75,6 +81,7 @@ struct ScalerCfg
void scale(size_t factor, //valid range: 2 - 5
const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
ColorFormat colFmt,
const ScalerCfg& cfg = ScalerCfg(),
int yFirst = 0, int yLast = std::numeric_limits<int>::max()); //slice of source image
@ -91,7 +98,7 @@ void nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight, int
SliceType st, int yFirst, int yLast);
//parameter tuning
bool equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance);
bool equalColorTest(uint32_t col1, uint32_t col2, ColorFormat colFmt, double luminanceWeight, double equalColorTolerance);