Blargg and phosphor threading by Thomas Jentzsch.

This commit is contained in:
Christian Speckner 2017-08-10 15:26:05 +02:00 committed by Stephen Anthony
parent 7a9cde3e4d
commit 6d1a306b52
6 changed files with 232 additions and 69 deletions

View File

@ -27,7 +27,7 @@
srcdir ?= .
DEFINES := -D_GLIBCXX_USE_CXX11_ABI=1
LDFLAGS :=
LDFLAGS := -pthread
INCLUDES :=
LIBS :=
OBJS :=

View File

@ -15,6 +15,7 @@
// this file, and for a DISCLAIMER OF ALL WARRANTIES.
//============================================================================
#include <thread>
#include "AtariNTSC.hxx"
// blitter related
@ -34,6 +35,9 @@ void AtariNTSC::initialize(const Setup& setup, const uInt8* palette)
{
init(myImpl, setup);
initializePalette(palette);
myNumThreads = std::min(4u, std::thread::hardware_concurrency());
myThreads = new std::thread[myNumThreads];
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -66,52 +70,189 @@ void AtariNTSC::initializePalette(const uInt8* palette)
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::render(const uInt8* atari_in, uInt32 in_width,
uInt32 in_height, void* rgb_out, uInt32 out_pitch)
void AtariNTSC::render(const uInt8* atari_in, const uInt32 in_width,
const uInt32 in_height, void* rgb_out, const uInt32 out_pitch)
{
// Spawn the threads...
for(uInt8 i = 0; i < myNumThreads; i++)
myThreads[i] = std::thread([=] {
renderThread(atari_in, in_width, in_height, myNumThreads, i, rgb_out, out_pitch);
});
// ...and make them join again
for(uInt8 i = 0; i < myNumThreads; i++)
myThreads[i].join();
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::render(const uInt8* atari_in, const uInt32 in_width, const uInt32 in_height,
void* rgb_out, const uInt32 out_pitch, uInt32* rgb_in)
{
// Spawn the threads...
for(uInt8 i = 0; i < myNumThreads; i++)
myThreads[i] = std::thread([=] {
renderWithPhosphorThread(atari_in, in_width, in_height, myNumThreads, i,
rgb_in, rgb_out, out_pitch);
});
// ...and make them join again
for(uInt8 i = 0; i < myNumThreads; i++)
myThreads[i].join();
// Copy phosphor values into out buffer
memcpy(rgb_out, rgb_in, in_height * out_pitch);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::renderThread(const uInt8* atari_in, const uInt32 in_width,
const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum,
void* rgb_out, const uInt32 out_pitch)
{
// Adapt parameters to thread number
const uInt32 yStart = in_height * threadNum / numThreads;
const uInt32 yEnd = in_height * (threadNum + 1) / numThreads;
atari_in += in_width * yStart;
rgb_out = static_cast<char*>(rgb_out) + out_pitch * yStart;
uInt32 const chunk_count = (in_width - 1) / PIXEL_in_chunk;
while ( in_height-- )
for(uInt32 y = yStart; y < yEnd; ++y)
{
const uInt8* line_in = atari_in;
ATARI_NTSC_BEGIN_ROW( NTSC_black, line_in[0] );
ATARI_NTSC_BEGIN_ROW(NTSC_black, line_in[0]);
uInt32* restrict line_out = static_cast<uInt32*>(rgb_out);
++line_in;
for ( uInt32 n = chunk_count; n; --n )
for(uInt32 n = chunk_count; n; --n)
{
/* order of input and output pixels must not be altered */
ATARI_NTSC_COLOR_IN( 0, line_in[0] );
ATARI_NTSC_RGB_OUT_8888( 0, line_out[0] );
ATARI_NTSC_RGB_OUT_8888( 1, line_out[1] );
ATARI_NTSC_RGB_OUT_8888( 2, line_out[2] );
ATARI_NTSC_RGB_OUT_8888( 3, line_out[3] );
// order of input and output pixels must not be altered
ATARI_NTSC_COLOR_IN(0, line_in[0]);
ATARI_NTSC_RGB_OUT_8888(0, line_out[0]);
ATARI_NTSC_RGB_OUT_8888(1, line_out[1]);
ATARI_NTSC_RGB_OUT_8888(2, line_out[2]);
ATARI_NTSC_RGB_OUT_8888(3, line_out[3]);
ATARI_NTSC_COLOR_IN( 1, line_in[1] );
ATARI_NTSC_RGB_OUT_8888( 4, line_out[4] );
ATARI_NTSC_RGB_OUT_8888( 5, line_out[5] );
ATARI_NTSC_RGB_OUT_8888( 6, line_out[6] );
ATARI_NTSC_COLOR_IN(1, line_in[1]);
ATARI_NTSC_RGB_OUT_8888(4, line_out[4]);
ATARI_NTSC_RGB_OUT_8888(5, line_out[5]);
ATARI_NTSC_RGB_OUT_8888(6, line_out[6]);
line_in += 2;
line_out += 7;
}
/* finish final pixels */
ATARI_NTSC_COLOR_IN( 0, NTSC_black );
ATARI_NTSC_RGB_OUT_8888( 0, line_out[0] );
ATARI_NTSC_RGB_OUT_8888( 1, line_out[1] );
ATARI_NTSC_RGB_OUT_8888( 2, line_out[2] );
ATARI_NTSC_RGB_OUT_8888( 3, line_out[3] );
// finish final pixels
ATARI_NTSC_COLOR_IN(0, NTSC_black);
ATARI_NTSC_RGB_OUT_8888(0, line_out[0]);
ATARI_NTSC_RGB_OUT_8888(1, line_out[1]);
ATARI_NTSC_RGB_OUT_8888(2, line_out[2]);
ATARI_NTSC_RGB_OUT_8888(3, line_out[3]);
ATARI_NTSC_COLOR_IN( 1, NTSC_black );
ATARI_NTSC_RGB_OUT_8888( 4, line_out[4] );
ATARI_NTSC_RGB_OUT_8888( 5, line_out[5] );
ATARI_NTSC_RGB_OUT_8888( 6, line_out[6] );
ATARI_NTSC_COLOR_IN(1, NTSC_black);
ATARI_NTSC_RGB_OUT_8888(4, line_out[4]);
ATARI_NTSC_RGB_OUT_8888(5, line_out[5]);
ATARI_NTSC_RGB_OUT_8888(6, line_out[6]);
atari_in += in_width;
rgb_out = static_cast<char*>(rgb_out) + out_pitch;
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::renderWithPhosphorThread(const uInt8* atari_in, const uInt32 in_width,
const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum,
uInt32* rgb_in, void* rgb_out, const uInt32 out_pitch)
{
// Adapt parameters to thread number
const uInt32 yStart = in_height * threadNum / numThreads;
const uInt32 yEnd = in_height * (threadNum + 1) / numThreads;
uInt32 bufofs = AtariNTSC::outWidth(in_width) * yStart;
uInt32* out = static_cast<uInt32*>(rgb_out);
atari_in += in_width * yStart;
rgb_out = static_cast<char*>(rgb_out) + out_pitch * yStart;
uInt32 const chunk_count = (in_width - 1) / PIXEL_in_chunk;
for(uInt32 y = yStart; y < yEnd; ++y)
{
const uInt8* line_in = atari_in;
ATARI_NTSC_BEGIN_ROW(NTSC_black, line_in[0]);
uInt32* restrict line_out = static_cast<uInt32*>(rgb_out);
++line_in;
for(uInt32 n = chunk_count; n; --n)
{
// order of input and output pixels must not be altered
ATARI_NTSC_COLOR_IN(0, line_in[0]);
ATARI_NTSC_RGB_OUT_8888(0, line_out[0]);
ATARI_NTSC_RGB_OUT_8888(1, line_out[1]);
ATARI_NTSC_RGB_OUT_8888(2, line_out[2]);
ATARI_NTSC_RGB_OUT_8888(3, line_out[3]);
ATARI_NTSC_COLOR_IN(1, line_in[1]);
ATARI_NTSC_RGB_OUT_8888(4, line_out[4]);
ATARI_NTSC_RGB_OUT_8888(5, line_out[5]);
ATARI_NTSC_RGB_OUT_8888(6, line_out[6]);
line_in += 2;
line_out += 7;
}
// finish final pixels
ATARI_NTSC_COLOR_IN(0, NTSC_black);
ATARI_NTSC_RGB_OUT_8888(0, line_out[0]);
ATARI_NTSC_RGB_OUT_8888(1, line_out[1]);
ATARI_NTSC_RGB_OUT_8888(2, line_out[2]);
ATARI_NTSC_RGB_OUT_8888(3, line_out[3]);
ATARI_NTSC_COLOR_IN(1, NTSC_black);
ATARI_NTSC_RGB_OUT_8888(4, line_out[4]);
ATARI_NTSC_RGB_OUT_8888(5, line_out[5]);
ATARI_NTSC_RGB_OUT_8888(6, line_out[6]);
// Do phosphor mode (blend the resulting frames)
// Note: The codes assumes that AtariNTSC::outWidth(kTIAW) == outPitch == 560
for (uInt32 x = AtariNTSC::outWidth(in_width) / 8; x; --x)
{
// Store back into displayed frame buffer (for next frame)
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
}
atari_in += in_width;
rgb_out = static_cast<char*>(rgb_out) + out_pitch;
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
inline uInt32 AtariNTSC::getRGBPhosphor(const uInt32 c, const uInt32 p) const
{
#define TO_RGB(color, red, green, blue) \
const uInt8 red = color >> 16; const uInt8 green = color >> 8; const uInt8 blue = color;
TO_RGB(c, rc, gc, bc);
TO_RGB(p, rp, gp, bp);
// Mix current calculated frame with previous displayed frame
const uInt8 rn = myPhosphorPalette[rc][rp];
const uInt8 gn = myPhosphorPalette[gc][gp];
const uInt8 bn = myPhosphorPalette[bc][bp];
return (rn << 16) | (gn << 8) | bn;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::init(init_t& impl, const Setup& setup)
{

View File

@ -80,12 +80,19 @@ class AtariNTSC
void initialize(const Setup& setup, const uInt8* palette);
void initializePalette(const uInt8* palette);
// Set phosphor palette, for use in Blargg + phosphor mode
void setPhosphorPalette(uInt8 palette[256][256]) {
memcpy(myPhosphorPalette, palette, 256 * 256);
}
// Filters one or more rows of pixels. Input pixels are 8-bit Atari
// palette colors.
// In_row_width is the number of pixels to get to the next input row.
// Out_pitch is the number of *bytes* to get to the next output row.
void render(const uInt8* atari_in, uInt32 in_width, uInt32 in_height,
void* rgb_out, uInt32 out_pitch);
void render(const uInt8* atari_in, const uInt32 in_width, const uInt32 in_height,
void* rgb_out, const uInt32 out_pitch);
void render(const uInt8* atari_in, const uInt32 in_width, const uInt32 in_height,
void* rgb_out, const uInt32 out_pitch, uInt32* rgb_in);
// Number of input pixels that will fit within given output width.
// Might be rounded down slightly; use outWidth() on result to find
@ -101,6 +108,23 @@ class AtariNTSC
return ((((in_width) - 1) / PIXEL_in_chunk + 1)* PIXEL_out_chunk);
}
private:
// Threaded rendering
void renderThread(const uInt8* atari_in, const uInt32 in_width,
const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum, void* rgb_out, const uInt32 out_pitch);
void renderWithPhosphorThread(const uInt8* atari_in, const uInt32 in_width,
const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum, uInt32* rgb_in, void* rgb_out, const uInt32 out_pitch);
/**
Used to calculate an averaged color for the 'phosphor' effect.
@param c RGB Color 1 (current frame)
@param p RGB Color 2 (previous frame)
@return Averaged value of the two RGB colors
*/
uInt32 getRGBPhosphor(const uInt32 c, const uInt32 cp) const;
private:
enum {
PIXEL_in_chunk = 2, // number of input pixels read per chunk
@ -138,6 +162,12 @@ class AtariNTSC
#define LUMA_CUTOFF 0.20
uInt32 myColorTable[palette_size][entry_size];
uInt8 myPhosphorPalette[256][256];
// Rendering threads
std::thread* myThreads;
// Number of rendering threads
uInt8 myNumThreads;
struct init_t
{

View File

@ -72,6 +72,10 @@ class NTSCFilter
myNTSC.initializePalette(myTIAPalette);
}
inline void setPhosphorPalette(uInt8 palette[256][256]) {
myNTSC.setPhosphorPalette(palette);
}
// The following are meant to be used strictly for toggling from the GUI
string setPreset(Preset preset);
@ -110,6 +114,11 @@ class NTSCFilter
{
myNTSC.render(src_buf, src_width, src_height, dest_buf, dest_pitch);
}
inline void render(uInt8* src_buf, uInt32 src_width, uInt32 src_height,
uInt32* dest_buf, uInt32 dest_pitch, uInt32* prev_buf)
{
myNTSC.render(src_buf, src_width, src_height, dest_buf, dest_pitch, prev_buf);
}
private:
// Convert from atari_ntsc_setup_t values to equivalent adjustables
@ -135,6 +144,7 @@ class NTSCFilter
// and 128 black&white colours (PAL colour loss)
// Each colour is represented by 3 bytes, in R,G,B order
uInt8 myTIAPalette[AtariNTSC::palette_size * 3];
uInt8* myPhosphorPalette;
struct AdjustableTag {
const char* const type;

View File

@ -16,6 +16,7 @@
//============================================================================
#include <cmath>
#include <algorithm>
#include "FrameBuffer.hxx"
#include "Settings.hxx"
@ -241,11 +242,13 @@ void TIASurface::enablePhosphor(bool enable, int blend)
for(Int16 c = 255; c >= 0; c--)
for(Int16 p = 255; p >= 0; p--)
myPhosphorPalette[c][p] = getPhosphor(c, p);
myNTSCFilter.setPhosphorPalette(myPhosphorPalette);
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
inline uInt32 TIASurface::getRGBPhosphor(uInt32 c, uInt32 p) const
inline uInt32 TIASurface::getRGBPhosphor(const uInt32 c, const uInt32 p) const
{
#define TO_RGB(color, red, green, blue) \
const uInt8 red = color >> 16; const uInt8 green = color >> 8; const uInt8 blue = color;
@ -325,16 +328,17 @@ void TIASurface::render()
{
case Filter::Normal:
{
uInt8* in = myTIA->frameBuffer();
uInt8* tiaIn = myTIA->frameBuffer();
uInt32 bufofsY = 0, screenofsY = 0, pos = 0;
uInt32 bufofs = 0, screenofsY = 0, pos;
for(uInt32 y = 0; y < height; ++y)
{
pos = screenofsY;
for(uInt32 x = 0; x < width; ++x)
out[pos++] = myPalette[in[bufofsY + x]];
bufofsY += width;
for (uInt32 x = width / 2; x; --x)
{
out[pos++] = myPalette[tiaIn[bufofs++]];
out[pos++] = myPalette[tiaIn[bufofs++]];
}
screenofsY += outPitch;
}
break;
@ -345,21 +349,18 @@ void TIASurface::render()
uInt8* tiaIn = myTIA->frameBuffer();
uInt32* rgbIn = myRGBFramebuffer;
uInt32 bufofsY = 0, screenofsY = 0, pos = 0;
for(uInt32 y = 0; y < height; ++y)
uInt32 bufofs = 0, screenofsY = 0, pos;
for(uInt32 y = height; y ; --y)
{
pos = screenofsY;
for(uInt32 x = 0; x < width; ++x)
for(uInt32 x = width / 2; x ; --x)
{
const uInt32 bufofs = bufofsY + x;
const uInt8 c = tiaIn[bufofs];
const uInt32 retVal = getRGBPhosphor(myPalette[c], rgbIn[bufofs]);
// Store back into displayed frame buffer (for next frame)
rgbIn[bufofs] = retVal;
out[pos++] = retVal;
rgbIn[bufofs] = out[pos++] = getRGBPhosphor(myPalette[tiaIn[bufofs]], rgbIn[bufofs]);
bufofs++;
rgbIn[bufofs] = out[pos++] = getRGBPhosphor(myPalette[tiaIn[bufofs]], rgbIn[bufofs]);
bufofs++;
}
bufofsY += width;
screenofsY += outPitch;
}
break;
@ -373,28 +374,7 @@ void TIASurface::render()
case Filter::BlarggPhosphor:
{
// First do Blargg filtering
myNTSCFilter.render(myTIA->frameBuffer(), width, height, out, outPitch << 2);
// Then do phosphor mode (blend the resulting frames)
uInt32* rgbIn = myRGBFramebuffer;
uInt32 bufofsY = 0, screenofsY = 0, pos = 0;
for(uInt32 y = 0; y < height; ++y)
{
pos = screenofsY;
for(uInt32 x = 0; x < AtariNTSC::outWidth(kTIAW); ++x)
{
const uInt32 bufofs = bufofsY + x;
const uInt32 retVal = getRGBPhosphor(out[bufofs], rgbIn[bufofs]);
// Store back into displayed frame buffer (for next frame)
rgbIn[bufofs] = retVal;
out[pos++] = retVal;
}
bufofsY += AtariNTSC::outWidth(kTIAW);
screenofsY += outPitch;
}
myNTSCFilter.render(myTIA->frameBuffer(), width, height, out, outPitch << 2, myRGBFramebuffer);
break;
}
}

View File

@ -25,6 +25,8 @@ class FrameBuffer;
class FBSurface;
class VideoMode;
#include <thread>
#include "FrameManager.hxx"
#include "Rect.hxx"
#include "NTSCFilter.hxx"
@ -134,7 +136,7 @@ class TIASurface
@return Averaged value of the two RGB colors
*/
uInt32 getRGBPhosphor(uInt32 c, uInt32 cp) const;
uInt32 getRGBPhosphor(const uInt32 c, const uInt32 cp) const;
/**
Enable/disable/query NTSC filtering effects.