snes9x/filter/sharpbilinear_flexible.cpp

164 lines
6.1 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*****************************************************************************\
Snes9x - Portable Super Nintendo Entertainment System (TM) emulator.
This file is licensed under the Snes9x License.
For further information, consult the LICENSE file in the root directory.
\*****************************************************************************/
#include <cstdint>
#include <cmath>
#include <algorithm>
#define CLAMP_U8(x, lo, hi) ((x) < (lo) ? (lo) : ((x) > (hi) ? (hi) : (x)))
// ---- Gamma tables (unchanged behavior) --------------------------------------
static uint8_t gamma_r_encode[32];
static uint8_t gamma_g_encode[64];
static uint8_t gamma_decode[256];
static void init_gamma_tables()
{
constexpr float gamma = 1.6f;
constexpr float inv_gamma = 1.0f / gamma;
for (int i = 0; i < 32; ++i)
gamma_r_encode[i] = uint8_t(CLAMP_U8(int(std::pow((i << 3) / 255.0f, gamma) * 255.0f + 0.5f), 0, 255));
for (int i = 0; i < 64; ++i)
gamma_g_encode[i] = uint8_t(CLAMP_U8(int(std::pow((i << 2) / 255.0f, gamma) * 255.0f + 0.5f), 0, 255));
for (int i = 0; i < 256; ++i)
gamma_decode[i] = uint8_t(CLAMP_U8(int(std::pow(i / 255.0f, inv_gamma) * 255.0f + 0.5f), 0, 255));
}
// ---- RGB565 helpers ---------------------------------------------------------
static inline uint16_t build_rgb565_fast(int r, int g, int b)
{
return ((r & 0xF8) << 8) | ((g & 0xFC) << 3) | (b >> 3);
}
static inline void unpack_rgb565_gamma(const uint8_t* src, int pitch, int x, int y, int& r, int& g, int& b)
{
const uint8_t* pixel = src + y * pitch + x * 2;
const uint16_t color = uint16_t(pixel[0]) | (uint16_t(pixel[1]) << 8);
const int r5 = (color >> 11) & 0x1F;
const int g6 = (color >> 5) & 0x3F;
const int b5 = color & 0x1F;
r = gamma_r_encode[r5];
g = gamma_g_encode[g6];
b = gamma_r_encode[b5]; // reuse red gamma table for blue
}
// ---- Fixed-point smoothstep weights at 4× sample locations ------------------
// smoothstep(0,1,x) at x in {0, 1/4, 1/2, 3/4} = {0, 5/32, 1/2, 27/32}
// Scale by 256 for 8.8 fixed point.
static constexpr uint16_t W[4] = { 0, 40, 128, 216 }; // w = smoothstep
static constexpr uint16_t IW[4] = { 256, 216, 128, 40 }; // 256 - w
void ApplySharpBilinear4x(uint8_t* __restrict dst, int dst_pitch,
const uint8_t* __restrict src,
int src_width, int src_height, int src_pitch)
{
const int dst_width = src_width << 2; // *4
const int dst_height = src_height << 2; // *4
static bool gamma_ready = false;
if (!gamma_ready)
{
init_gamma_tables();
gamma_ready = true;
}
// Iterate over source texels; each emits a 4×4 block in the destination.
for (int sy = 0; sy < src_height; ++sy)
{
// Clamp source rows to avoid reading past the bottom edge.
const int sy0 = (sy < src_height - 1) ? sy : (src_height - 2);
const int sy1 = sy0 + 1;
// Precompute destination row base once per source row.
const int dy_base = sy << 2; // sy * 4
for (int sx = 0; sx < src_width; ++sx)
{
// Clamp source cols to avoid reading past the right edge.
const int sx0 = (sx < src_width - 1) ? sx : (src_width - 2);
const int sx1 = sx0 + 1;
// Unpack the 2×2 neighborhood exactly once per 4×4 block.
int r00, g00, b00;
int r10, g10, b10;
int r01, g01, b01;
int r11, g11, b11;
unpack_rgb565_gamma(src, src_pitch, sx0, sy0, r00, g00, b00);
unpack_rgb565_gamma(src, src_pitch, sx1, sy0, r10, g10, b10);
unpack_rgb565_gamma(src, src_pitch, sx0, sy1, r01, g01, b01);
unpack_rgb565_gamma(src, src_pitch, sx1, sy1, r11, g11, b11);
// Emit the 4×4 destination block using separable bilinear in 8.8 fixed-point.
const int dx_base = sx << 2; // sx * 4
// For each of the 4 subcolumns (dx), do horizontal mixes top/bottom once,
// then vertical mix for each of the 4 subrows (dy).
int rtop[4], gtop[4], btop[4];
int rbot[4], gbot[4], bbot[4];
for (int dx = 0; dx < 4; ++dx)
{
const uint16_t wx = W[dx];
const uint16_t iwx = IW[dx];
// Top row horizontal blend
rtop[dx] = (r00 * iwx + r10 * wx + 128) >> 8;
gtop[dx] = (g00 * iwx + g10 * wx + 128) >> 8;
btop[dx] = (b00 * iwx + b10 * wx + 128) >> 8;
// Bottom row horizontal blend
rbot[dx] = (r01 * iwx + r11 * wx + 128) >> 8;
gbot[dx] = (g01 * iwx + g11 * wx + 128) >> 8;
bbot[dx] = (b01 * iwx + b11 * wx + 128) >> 8;
}
for (int dy = 0; dy < 4; ++dy)
{
const uint16_t wy = W[dy];
const uint16_t iwy = IW[dy];
// Destination row pointer for this subrow
const int y = dy_base + dy;
uint8_t* __restrict dst_row = dst + y * dst_pitch;
for (int dx = 0; dx < 4; ++dx)
{
const int x = dx_base + dx;
// Final vertical blend
int r = (rtop[dx] * iwy + rbot[dx] * wy + 128) >> 8;
int g = (gtop[dx] * iwy + gbot[dx] * wy + 128) >> 8;
int b = (btop[dx] * iwy + bbot[dx] * wy + 128) >> 8;
// Gamma decode back to display space and pack
const uint16_t out = build_rgb565_fast(
gamma_decode[CLAMP_U8(r, 0, 255)],
gamma_decode[CLAMP_U8(g, 0, 255)],
gamma_decode[CLAMP_U8(b, 0, 255)]
);
uint8_t* __restrict dst_px = dst_row + (x << 1); // x*2
dst_px[0] = uint8_t(out & 0xFF);
dst_px[1] = uint8_t((out >> 8) & 0xFF);
}
}
}
}
}
void sharpbilinear_4x(uint8_t* srcPtr, int srcPitch,
uint8_t* dstPtr, int dstPitch,
int width, int height)
{
ApplySharpBilinear4x(dstPtr, dstPitch, srcPtr, width, height, srcPitch);
}