Blargg and phosphor threading by Thomas Jentzsch.

This commit is contained in:
Christian Speckner 2017-08-10 15:26:05 +02:00 committed by Stephen Anthony
parent 7a9cde3e4d
commit 6d1a306b52
6 changed files with 232 additions and 69 deletions

View File

@ -27,7 +27,7 @@
srcdir ?= . srcdir ?= .
DEFINES := -D_GLIBCXX_USE_CXX11_ABI=1 DEFINES := -D_GLIBCXX_USE_CXX11_ABI=1
LDFLAGS := LDFLAGS := -pthread
INCLUDES := INCLUDES :=
LIBS := LIBS :=
OBJS := OBJS :=

View File

@ -15,6 +15,7 @@
// this file, and for a DISCLAIMER OF ALL WARRANTIES. // this file, and for a DISCLAIMER OF ALL WARRANTIES.
//============================================================================ //============================================================================
#include <thread>
#include "AtariNTSC.hxx" #include "AtariNTSC.hxx"
// blitter related // blitter related
@ -34,6 +35,9 @@ void AtariNTSC::initialize(const Setup& setup, const uInt8* palette)
{ {
init(myImpl, setup); init(myImpl, setup);
initializePalette(palette); initializePalette(palette);
myNumThreads = std::min(4u, std::thread::hardware_concurrency());
myThreads = new std::thread[myNumThreads];
} }
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -66,52 +70,189 @@ void AtariNTSC::initializePalette(const uInt8* palette)
} }
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::render(const uInt8* atari_in, uInt32 in_width, void AtariNTSC::render(const uInt8* atari_in, const uInt32 in_width,
uInt32 in_height, void* rgb_out, uInt32 out_pitch) const uInt32 in_height, void* rgb_out, const uInt32 out_pitch)
{ {
// Spawn the threads...
for(uInt8 i = 0; i < myNumThreads; i++)
myThreads[i] = std::thread([=] {
renderThread(atari_in, in_width, in_height, myNumThreads, i, rgb_out, out_pitch);
});
// ...and make them join again
for(uInt8 i = 0; i < myNumThreads; i++)
myThreads[i].join();
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::render(const uInt8* atari_in, const uInt32 in_width, const uInt32 in_height,
void* rgb_out, const uInt32 out_pitch, uInt32* rgb_in)
{
// Spawn the threads...
for(uInt8 i = 0; i < myNumThreads; i++)
myThreads[i] = std::thread([=] {
renderWithPhosphorThread(atari_in, in_width, in_height, myNumThreads, i,
rgb_in, rgb_out, out_pitch);
});
// ...and make them join again
for(uInt8 i = 0; i < myNumThreads; i++)
myThreads[i].join();
// Copy phosphor values into out buffer
memcpy(rgb_out, rgb_in, in_height * out_pitch);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::renderThread(const uInt8* atari_in, const uInt32 in_width,
const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum,
void* rgb_out, const uInt32 out_pitch)
{
// Adapt parameters to thread number
const uInt32 yStart = in_height * threadNum / numThreads;
const uInt32 yEnd = in_height * (threadNum + 1) / numThreads;
atari_in += in_width * yStart;
rgb_out = static_cast<char*>(rgb_out) + out_pitch * yStart;
uInt32 const chunk_count = (in_width - 1) / PIXEL_in_chunk; uInt32 const chunk_count = (in_width - 1) / PIXEL_in_chunk;
while ( in_height-- )
for(uInt32 y = yStart; y < yEnd; ++y)
{ {
const uInt8* line_in = atari_in; const uInt8* line_in = atari_in;
ATARI_NTSC_BEGIN_ROW( NTSC_black, line_in[0] ); ATARI_NTSC_BEGIN_ROW(NTSC_black, line_in[0]);
uInt32* restrict line_out = static_cast<uInt32*>(rgb_out); uInt32* restrict line_out = static_cast<uInt32*>(rgb_out);
++line_in; ++line_in;
for ( uInt32 n = chunk_count; n; --n ) for(uInt32 n = chunk_count; n; --n)
{ {
/* order of input and output pixels must not be altered */ // order of input and output pixels must not be altered
ATARI_NTSC_COLOR_IN( 0, line_in[0] ); ATARI_NTSC_COLOR_IN(0, line_in[0]);
ATARI_NTSC_RGB_OUT_8888( 0, line_out[0] ); ATARI_NTSC_RGB_OUT_8888(0, line_out[0]);
ATARI_NTSC_RGB_OUT_8888( 1, line_out[1] ); ATARI_NTSC_RGB_OUT_8888(1, line_out[1]);
ATARI_NTSC_RGB_OUT_8888( 2, line_out[2] ); ATARI_NTSC_RGB_OUT_8888(2, line_out[2]);
ATARI_NTSC_RGB_OUT_8888( 3, line_out[3] ); ATARI_NTSC_RGB_OUT_8888(3, line_out[3]);
ATARI_NTSC_COLOR_IN( 1, line_in[1] ); ATARI_NTSC_COLOR_IN(1, line_in[1]);
ATARI_NTSC_RGB_OUT_8888( 4, line_out[4] ); ATARI_NTSC_RGB_OUT_8888(4, line_out[4]);
ATARI_NTSC_RGB_OUT_8888( 5, line_out[5] ); ATARI_NTSC_RGB_OUT_8888(5, line_out[5]);
ATARI_NTSC_RGB_OUT_8888( 6, line_out[6] ); ATARI_NTSC_RGB_OUT_8888(6, line_out[6]);
line_in += 2; line_in += 2;
line_out += 7; line_out += 7;
} }
/* finish final pixels */ // finish final pixels
ATARI_NTSC_COLOR_IN( 0, NTSC_black ); ATARI_NTSC_COLOR_IN(0, NTSC_black);
ATARI_NTSC_RGB_OUT_8888( 0, line_out[0] ); ATARI_NTSC_RGB_OUT_8888(0, line_out[0]);
ATARI_NTSC_RGB_OUT_8888( 1, line_out[1] ); ATARI_NTSC_RGB_OUT_8888(1, line_out[1]);
ATARI_NTSC_RGB_OUT_8888( 2, line_out[2] ); ATARI_NTSC_RGB_OUT_8888(2, line_out[2]);
ATARI_NTSC_RGB_OUT_8888( 3, line_out[3] ); ATARI_NTSC_RGB_OUT_8888(3, line_out[3]);
ATARI_NTSC_COLOR_IN( 1, NTSC_black ); ATARI_NTSC_COLOR_IN(1, NTSC_black);
ATARI_NTSC_RGB_OUT_8888( 4, line_out[4] ); ATARI_NTSC_RGB_OUT_8888(4, line_out[4]);
ATARI_NTSC_RGB_OUT_8888( 5, line_out[5] ); ATARI_NTSC_RGB_OUT_8888(5, line_out[5]);
ATARI_NTSC_RGB_OUT_8888( 6, line_out[6] ); ATARI_NTSC_RGB_OUT_8888(6, line_out[6]);
atari_in += in_width; atari_in += in_width;
rgb_out = static_cast<char*>(rgb_out) + out_pitch; rgb_out = static_cast<char*>(rgb_out) + out_pitch;
} }
} }
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::renderWithPhosphorThread(const uInt8* atari_in, const uInt32 in_width,
const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum,
uInt32* rgb_in, void* rgb_out, const uInt32 out_pitch)
{
// Adapt parameters to thread number
const uInt32 yStart = in_height * threadNum / numThreads;
const uInt32 yEnd = in_height * (threadNum + 1) / numThreads;
uInt32 bufofs = AtariNTSC::outWidth(in_width) * yStart;
uInt32* out = static_cast<uInt32*>(rgb_out);
atari_in += in_width * yStart;
rgb_out = static_cast<char*>(rgb_out) + out_pitch * yStart;
uInt32 const chunk_count = (in_width - 1) / PIXEL_in_chunk;
for(uInt32 y = yStart; y < yEnd; ++y)
{
const uInt8* line_in = atari_in;
ATARI_NTSC_BEGIN_ROW(NTSC_black, line_in[0]);
uInt32* restrict line_out = static_cast<uInt32*>(rgb_out);
++line_in;
for(uInt32 n = chunk_count; n; --n)
{
// order of input and output pixels must not be altered
ATARI_NTSC_COLOR_IN(0, line_in[0]);
ATARI_NTSC_RGB_OUT_8888(0, line_out[0]);
ATARI_NTSC_RGB_OUT_8888(1, line_out[1]);
ATARI_NTSC_RGB_OUT_8888(2, line_out[2]);
ATARI_NTSC_RGB_OUT_8888(3, line_out[3]);
ATARI_NTSC_COLOR_IN(1, line_in[1]);
ATARI_NTSC_RGB_OUT_8888(4, line_out[4]);
ATARI_NTSC_RGB_OUT_8888(5, line_out[5]);
ATARI_NTSC_RGB_OUT_8888(6, line_out[6]);
line_in += 2;
line_out += 7;
}
// finish final pixels
ATARI_NTSC_COLOR_IN(0, NTSC_black);
ATARI_NTSC_RGB_OUT_8888(0, line_out[0]);
ATARI_NTSC_RGB_OUT_8888(1, line_out[1]);
ATARI_NTSC_RGB_OUT_8888(2, line_out[2]);
ATARI_NTSC_RGB_OUT_8888(3, line_out[3]);
ATARI_NTSC_COLOR_IN(1, NTSC_black);
ATARI_NTSC_RGB_OUT_8888(4, line_out[4]);
ATARI_NTSC_RGB_OUT_8888(5, line_out[5]);
ATARI_NTSC_RGB_OUT_8888(6, line_out[6]);
// Do phosphor mode (blend the resulting frames)
// Note: The codes assumes that AtariNTSC::outWidth(kTIAW) == outPitch == 560
for (uInt32 x = AtariNTSC::outWidth(in_width) / 8; x; --x)
{
// Store back into displayed frame buffer (for next frame)
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]);
bufofs++;
}
atari_in += in_width;
rgb_out = static_cast<char*>(rgb_out) + out_pitch;
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
inline uInt32 AtariNTSC::getRGBPhosphor(const uInt32 c, const uInt32 p) const
{
#define TO_RGB(color, red, green, blue) \
const uInt8 red = color >> 16; const uInt8 green = color >> 8; const uInt8 blue = color;
TO_RGB(c, rc, gc, bc);
TO_RGB(p, rp, gp, bp);
// Mix current calculated frame with previous displayed frame
const uInt8 rn = myPhosphorPalette[rc][rp];
const uInt8 gn = myPhosphorPalette[gc][gp];
const uInt8 bn = myPhosphorPalette[bc][bp];
return (rn << 16) | (gn << 8) | bn;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
void AtariNTSC::init(init_t& impl, const Setup& setup) void AtariNTSC::init(init_t& impl, const Setup& setup)
{ {

View File

@ -80,12 +80,19 @@ class AtariNTSC
void initialize(const Setup& setup, const uInt8* palette); void initialize(const Setup& setup, const uInt8* palette);
void initializePalette(const uInt8* palette); void initializePalette(const uInt8* palette);
// Set phosphor palette, for use in Blargg + phosphor mode
void setPhosphorPalette(uInt8 palette[256][256]) {
memcpy(myPhosphorPalette, palette, 256 * 256);
}
// Filters one or more rows of pixels. Input pixels are 8-bit Atari // Filters one or more rows of pixels. Input pixels are 8-bit Atari
// palette colors. // palette colors.
// In_row_width is the number of pixels to get to the next input row. // In_row_width is the number of pixels to get to the next input row.
// Out_pitch is the number of *bytes* to get to the next output row. // Out_pitch is the number of *bytes* to get to the next output row.
void render(const uInt8* atari_in, uInt32 in_width, uInt32 in_height, void render(const uInt8* atari_in, const uInt32 in_width, const uInt32 in_height,
void* rgb_out, uInt32 out_pitch); void* rgb_out, const uInt32 out_pitch);
void render(const uInt8* atari_in, const uInt32 in_width, const uInt32 in_height,
void* rgb_out, const uInt32 out_pitch, uInt32* rgb_in);
// Number of input pixels that will fit within given output width. // Number of input pixels that will fit within given output width.
// Might be rounded down slightly; use outWidth() on result to find // Might be rounded down slightly; use outWidth() on result to find
@ -101,6 +108,23 @@ class AtariNTSC
return ((((in_width) - 1) / PIXEL_in_chunk + 1)* PIXEL_out_chunk); return ((((in_width) - 1) / PIXEL_in_chunk + 1)* PIXEL_out_chunk);
} }
private:
// Threaded rendering
void renderThread(const uInt8* atari_in, const uInt32 in_width,
const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum, void* rgb_out, const uInt32 out_pitch);
void renderWithPhosphorThread(const uInt8* atari_in, const uInt32 in_width,
const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum, uInt32* rgb_in, void* rgb_out, const uInt32 out_pitch);
/**
Used to calculate an averaged color for the 'phosphor' effect.
@param c RGB Color 1 (current frame)
@param p RGB Color 2 (previous frame)
@return Averaged value of the two RGB colors
*/
uInt32 getRGBPhosphor(const uInt32 c, const uInt32 cp) const;
private: private:
enum { enum {
PIXEL_in_chunk = 2, // number of input pixels read per chunk PIXEL_in_chunk = 2, // number of input pixels read per chunk
@ -138,6 +162,12 @@ class AtariNTSC
#define LUMA_CUTOFF 0.20 #define LUMA_CUTOFF 0.20
uInt32 myColorTable[palette_size][entry_size]; uInt32 myColorTable[palette_size][entry_size];
uInt8 myPhosphorPalette[256][256];
// Rendering threads
std::thread* myThreads;
// Number of rendering threads
uInt8 myNumThreads;
struct init_t struct init_t
{ {

View File

@ -72,6 +72,10 @@ class NTSCFilter
myNTSC.initializePalette(myTIAPalette); myNTSC.initializePalette(myTIAPalette);
} }
inline void setPhosphorPalette(uInt8 palette[256][256]) {
myNTSC.setPhosphorPalette(palette);
}
// The following are meant to be used strictly for toggling from the GUI // The following are meant to be used strictly for toggling from the GUI
string setPreset(Preset preset); string setPreset(Preset preset);
@ -110,6 +114,11 @@ class NTSCFilter
{ {
myNTSC.render(src_buf, src_width, src_height, dest_buf, dest_pitch); myNTSC.render(src_buf, src_width, src_height, dest_buf, dest_pitch);
} }
inline void render(uInt8* src_buf, uInt32 src_width, uInt32 src_height,
uInt32* dest_buf, uInt32 dest_pitch, uInt32* prev_buf)
{
myNTSC.render(src_buf, src_width, src_height, dest_buf, dest_pitch, prev_buf);
}
private: private:
// Convert from atari_ntsc_setup_t values to equivalent adjustables // Convert from atari_ntsc_setup_t values to equivalent adjustables
@ -135,6 +144,7 @@ class NTSCFilter
// and 128 black&white colours (PAL colour loss) // and 128 black&white colours (PAL colour loss)
// Each colour is represented by 3 bytes, in R,G,B order // Each colour is represented by 3 bytes, in R,G,B order
uInt8 myTIAPalette[AtariNTSC::palette_size * 3]; uInt8 myTIAPalette[AtariNTSC::palette_size * 3];
uInt8* myPhosphorPalette;
struct AdjustableTag { struct AdjustableTag {
const char* const type; const char* const type;

View File

@ -16,6 +16,7 @@
//============================================================================ //============================================================================
#include <cmath> #include <cmath>
#include <algorithm>
#include "FrameBuffer.hxx" #include "FrameBuffer.hxx"
#include "Settings.hxx" #include "Settings.hxx"
@ -241,11 +242,13 @@ void TIASurface::enablePhosphor(bool enable, int blend)
for(Int16 c = 255; c >= 0; c--) for(Int16 c = 255; c >= 0; c--)
for(Int16 p = 255; p >= 0; p--) for(Int16 p = 255; p >= 0; p--)
myPhosphorPalette[c][p] = getPhosphor(c, p); myPhosphorPalette[c][p] = getPhosphor(c, p);
myNTSCFilter.setPhosphorPalette(myPhosphorPalette);
} }
} }
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
inline uInt32 TIASurface::getRGBPhosphor(uInt32 c, uInt32 p) const inline uInt32 TIASurface::getRGBPhosphor(const uInt32 c, const uInt32 p) const
{ {
#define TO_RGB(color, red, green, blue) \ #define TO_RGB(color, red, green, blue) \
const uInt8 red = color >> 16; const uInt8 green = color >> 8; const uInt8 blue = color; const uInt8 red = color >> 16; const uInt8 green = color >> 8; const uInt8 blue = color;
@ -325,16 +328,17 @@ void TIASurface::render()
{ {
case Filter::Normal: case Filter::Normal:
{ {
uInt8* in = myTIA->frameBuffer(); uInt8* tiaIn = myTIA->frameBuffer();
uInt32 bufofsY = 0, screenofsY = 0, pos = 0; uInt32 bufofs = 0, screenofsY = 0, pos;
for(uInt32 y = 0; y < height; ++y) for(uInt32 y = 0; y < height; ++y)
{ {
pos = screenofsY; pos = screenofsY;
for(uInt32 x = 0; x < width; ++x) for (uInt32 x = width / 2; x; --x)
out[pos++] = myPalette[in[bufofsY + x]]; {
out[pos++] = myPalette[tiaIn[bufofs++]];
bufofsY += width; out[pos++] = myPalette[tiaIn[bufofs++]];
}
screenofsY += outPitch; screenofsY += outPitch;
} }
break; break;
@ -345,21 +349,18 @@ void TIASurface::render()
uInt8* tiaIn = myTIA->frameBuffer(); uInt8* tiaIn = myTIA->frameBuffer();
uInt32* rgbIn = myRGBFramebuffer; uInt32* rgbIn = myRGBFramebuffer;
uInt32 bufofsY = 0, screenofsY = 0, pos = 0; uInt32 bufofs = 0, screenofsY = 0, pos;
for(uInt32 y = 0; y < height; ++y) for(uInt32 y = height; y ; --y)
{ {
pos = screenofsY; pos = screenofsY;
for(uInt32 x = 0; x < width; ++x) for(uInt32 x = width / 2; x ; --x)
{ {
const uInt32 bufofs = bufofsY + x;
const uInt8 c = tiaIn[bufofs];
const uInt32 retVal = getRGBPhosphor(myPalette[c], rgbIn[bufofs]);
// Store back into displayed frame buffer (for next frame) // Store back into displayed frame buffer (for next frame)
rgbIn[bufofs] = retVal; rgbIn[bufofs] = out[pos++] = getRGBPhosphor(myPalette[tiaIn[bufofs]], rgbIn[bufofs]);
out[pos++] = retVal; bufofs++;
rgbIn[bufofs] = out[pos++] = getRGBPhosphor(myPalette[tiaIn[bufofs]], rgbIn[bufofs]);
bufofs++;
} }
bufofsY += width;
screenofsY += outPitch; screenofsY += outPitch;
} }
break; break;
@ -373,28 +374,7 @@ void TIASurface::render()
case Filter::BlarggPhosphor: case Filter::BlarggPhosphor:
{ {
// First do Blargg filtering myNTSCFilter.render(myTIA->frameBuffer(), width, height, out, outPitch << 2, myRGBFramebuffer);
myNTSCFilter.render(myTIA->frameBuffer(), width, height, out, outPitch << 2);
// Then do phosphor mode (blend the resulting frames)
uInt32* rgbIn = myRGBFramebuffer;
uInt32 bufofsY = 0, screenofsY = 0, pos = 0;
for(uInt32 y = 0; y < height; ++y)
{
pos = screenofsY;
for(uInt32 x = 0; x < AtariNTSC::outWidth(kTIAW); ++x)
{
const uInt32 bufofs = bufofsY + x;
const uInt32 retVal = getRGBPhosphor(out[bufofs], rgbIn[bufofs]);
// Store back into displayed frame buffer (for next frame)
rgbIn[bufofs] = retVal;
out[pos++] = retVal;
}
bufofsY += AtariNTSC::outWidth(kTIAW);
screenofsY += outPitch;
}
break; break;
} }
} }

View File

@ -25,6 +25,8 @@ class FrameBuffer;
class FBSurface; class FBSurface;
class VideoMode; class VideoMode;
#include <thread>
#include "FrameManager.hxx" #include "FrameManager.hxx"
#include "Rect.hxx" #include "Rect.hxx"
#include "NTSCFilter.hxx" #include "NTSCFilter.hxx"
@ -134,7 +136,7 @@ class TIASurface
@return Averaged value of the two RGB colors @return Averaged value of the two RGB colors
*/ */
uInt32 getRGBPhosphor(uInt32 c, uInt32 cp) const; uInt32 getRGBPhosphor(const uInt32 c, const uInt32 cp) const;
/** /**
Enable/disable/query NTSC filtering effects. Enable/disable/query NTSC filtering effects.