diff --git a/Makefile b/Makefile index 346640e53..3bf0fc162 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ srcdir ?= . DEFINES := -D_GLIBCXX_USE_CXX11_ABI=1 -LDFLAGS := +LDFLAGS := -pthread INCLUDES := LIBS := OBJS := diff --git a/src/common/tv_filters/AtariNTSC.cxx b/src/common/tv_filters/AtariNTSC.cxx index 155a3b7bc..0df5a6513 100644 --- a/src/common/tv_filters/AtariNTSC.cxx +++ b/src/common/tv_filters/AtariNTSC.cxx @@ -15,6 +15,7 @@ // this file, and for a DISCLAIMER OF ALL WARRANTIES. //============================================================================ +#include #include "AtariNTSC.hxx" // blitter related @@ -34,6 +35,9 @@ void AtariNTSC::initialize(const Setup& setup, const uInt8* palette) { init(myImpl, setup); initializePalette(palette); + + myNumThreads = std::min(4u, std::thread::hardware_concurrency()); + myThreads = new std::thread[myNumThreads]; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -66,52 +70,189 @@ void AtariNTSC::initializePalette(const uInt8* palette) } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -void AtariNTSC::render(const uInt8* atari_in, uInt32 in_width, - uInt32 in_height, void* rgb_out, uInt32 out_pitch) +void AtariNTSC::render(const uInt8* atari_in, const uInt32 in_width, + const uInt32 in_height, void* rgb_out, const uInt32 out_pitch) { + // Spawn the threads... + for(uInt8 i = 0; i < myNumThreads; i++) + myThreads[i] = std::thread([=] { + renderThread(atari_in, in_width, in_height, myNumThreads, i, rgb_out, out_pitch); + }); + // ...and make them join again + for(uInt8 i = 0; i < myNumThreads; i++) + myThreads[i].join(); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +void AtariNTSC::render(const uInt8* atari_in, const uInt32 in_width, const uInt32 in_height, + void* rgb_out, const uInt32 out_pitch, uInt32* rgb_in) +{ + // Spawn the threads... + for(uInt8 i = 0; i < myNumThreads; i++) + myThreads[i] = std::thread([=] { + renderWithPhosphorThread(atari_in, in_width, in_height, myNumThreads, i, + rgb_in, rgb_out, out_pitch); + }); + // ...and make them join again + for(uInt8 i = 0; i < myNumThreads; i++) + myThreads[i].join(); + + // Copy phosphor values into out buffer + memcpy(rgb_out, rgb_in, in_height * out_pitch); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +void AtariNTSC::renderThread(const uInt8* atari_in, const uInt32 in_width, + const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum, + void* rgb_out, const uInt32 out_pitch) +{ + // Adapt parameters to thread number + const uInt32 yStart = in_height * threadNum / numThreads; + const uInt32 yEnd = in_height * (threadNum + 1) / numThreads; + atari_in += in_width * yStart; + rgb_out = static_cast(rgb_out) + out_pitch * yStart; + uInt32 const chunk_count = (in_width - 1) / PIXEL_in_chunk; - while ( in_height-- ) + + for(uInt32 y = yStart; y < yEnd; ++y) { const uInt8* line_in = atari_in; - ATARI_NTSC_BEGIN_ROW( NTSC_black, line_in[0] ); + ATARI_NTSC_BEGIN_ROW(NTSC_black, line_in[0]); uInt32* restrict line_out = static_cast(rgb_out); ++line_in; - for ( uInt32 n = chunk_count; n; --n ) + for(uInt32 n = chunk_count; n; --n) { - /* order of input and output pixels must not be altered */ - ATARI_NTSC_COLOR_IN( 0, line_in[0] ); - ATARI_NTSC_RGB_OUT_8888( 0, line_out[0] ); - ATARI_NTSC_RGB_OUT_8888( 1, line_out[1] ); - ATARI_NTSC_RGB_OUT_8888( 2, line_out[2] ); - ATARI_NTSC_RGB_OUT_8888( 3, line_out[3] ); + // order of input and output pixels must not be altered + ATARI_NTSC_COLOR_IN(0, line_in[0]); + ATARI_NTSC_RGB_OUT_8888(0, line_out[0]); + ATARI_NTSC_RGB_OUT_8888(1, line_out[1]); + ATARI_NTSC_RGB_OUT_8888(2, line_out[2]); + ATARI_NTSC_RGB_OUT_8888(3, line_out[3]); - ATARI_NTSC_COLOR_IN( 1, line_in[1] ); - ATARI_NTSC_RGB_OUT_8888( 4, line_out[4] ); - ATARI_NTSC_RGB_OUT_8888( 5, line_out[5] ); - ATARI_NTSC_RGB_OUT_8888( 6, line_out[6] ); + ATARI_NTSC_COLOR_IN(1, line_in[1]); + ATARI_NTSC_RGB_OUT_8888(4, line_out[4]); + ATARI_NTSC_RGB_OUT_8888(5, line_out[5]); + ATARI_NTSC_RGB_OUT_8888(6, line_out[6]); - line_in += 2; + line_in += 2; line_out += 7; } - /* finish final pixels */ - ATARI_NTSC_COLOR_IN( 0, NTSC_black ); - ATARI_NTSC_RGB_OUT_8888( 0, line_out[0] ); - ATARI_NTSC_RGB_OUT_8888( 1, line_out[1] ); - ATARI_NTSC_RGB_OUT_8888( 2, line_out[2] ); - ATARI_NTSC_RGB_OUT_8888( 3, line_out[3] ); + // finish final pixels + ATARI_NTSC_COLOR_IN(0, NTSC_black); + ATARI_NTSC_RGB_OUT_8888(0, line_out[0]); + ATARI_NTSC_RGB_OUT_8888(1, line_out[1]); + ATARI_NTSC_RGB_OUT_8888(2, line_out[2]); + ATARI_NTSC_RGB_OUT_8888(3, line_out[3]); - ATARI_NTSC_COLOR_IN( 1, NTSC_black ); - ATARI_NTSC_RGB_OUT_8888( 4, line_out[4] ); - ATARI_NTSC_RGB_OUT_8888( 5, line_out[5] ); - ATARI_NTSC_RGB_OUT_8888( 6, line_out[6] ); + ATARI_NTSC_COLOR_IN(1, NTSC_black); + ATARI_NTSC_RGB_OUT_8888(4, line_out[4]); + ATARI_NTSC_RGB_OUT_8888(5, line_out[5]); + ATARI_NTSC_RGB_OUT_8888(6, line_out[6]); atari_in += in_width; rgb_out = static_cast(rgb_out) + out_pitch; } } +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +void AtariNTSC::renderWithPhosphorThread(const uInt8* atari_in, const uInt32 in_width, + const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum, + uInt32* rgb_in, void* rgb_out, const uInt32 out_pitch) +{ + // Adapt parameters to thread number + const uInt32 yStart = in_height * threadNum / numThreads; + const uInt32 yEnd = in_height * (threadNum + 1) / numThreads; + uInt32 bufofs = AtariNTSC::outWidth(in_width) * yStart; + uInt32* out = static_cast(rgb_out); + atari_in += in_width * yStart; + rgb_out = static_cast(rgb_out) + out_pitch * yStart; + + uInt32 const chunk_count = (in_width - 1) / PIXEL_in_chunk; + + for(uInt32 y = yStart; y < yEnd; ++y) + { + const uInt8* line_in = atari_in; + ATARI_NTSC_BEGIN_ROW(NTSC_black, line_in[0]); + uInt32* restrict line_out = static_cast(rgb_out); + ++line_in; + + for(uInt32 n = chunk_count; n; --n) + { + // order of input and output pixels must not be altered + ATARI_NTSC_COLOR_IN(0, line_in[0]); + ATARI_NTSC_RGB_OUT_8888(0, line_out[0]); + ATARI_NTSC_RGB_OUT_8888(1, line_out[1]); + ATARI_NTSC_RGB_OUT_8888(2, line_out[2]); + ATARI_NTSC_RGB_OUT_8888(3, line_out[3]); + + ATARI_NTSC_COLOR_IN(1, line_in[1]); + ATARI_NTSC_RGB_OUT_8888(4, line_out[4]); + ATARI_NTSC_RGB_OUT_8888(5, line_out[5]); + ATARI_NTSC_RGB_OUT_8888(6, line_out[6]); + + line_in += 2; + line_out += 7; + } + + // finish final pixels + ATARI_NTSC_COLOR_IN(0, NTSC_black); + ATARI_NTSC_RGB_OUT_8888(0, line_out[0]); + ATARI_NTSC_RGB_OUT_8888(1, line_out[1]); + ATARI_NTSC_RGB_OUT_8888(2, line_out[2]); + ATARI_NTSC_RGB_OUT_8888(3, line_out[3]); + + ATARI_NTSC_COLOR_IN(1, NTSC_black); + ATARI_NTSC_RGB_OUT_8888(4, line_out[4]); + ATARI_NTSC_RGB_OUT_8888(5, line_out[5]); + ATARI_NTSC_RGB_OUT_8888(6, line_out[6]); + + // Do phosphor mode (blend the resulting frames) + // Note: The codes assumes that AtariNTSC::outWidth(kTIAW) == outPitch == 560 + for (uInt32 x = AtariNTSC::outWidth(in_width) / 8; x; --x) + { + // Store back into displayed frame buffer (for next frame) + rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]); + bufofs++; + rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]); + bufofs++; + rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]); + bufofs++; + rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]); + bufofs++; + rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]); + bufofs++; + rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]); + bufofs++; + rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]); + bufofs++; + rgb_in[bufofs] = getRGBPhosphor(out[bufofs], rgb_in[bufofs]); + bufofs++; + } + + atari_in += in_width; + rgb_out = static_cast(rgb_out) + out_pitch; + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +inline uInt32 AtariNTSC::getRGBPhosphor(const uInt32 c, const uInt32 p) const +{ +#define TO_RGB(color, red, green, blue) \ + const uInt8 red = color >> 16; const uInt8 green = color >> 8; const uInt8 blue = color; + + TO_RGB(c, rc, gc, bc); + TO_RGB(p, rp, gp, bp); + + // Mix current calculated frame with previous displayed frame + const uInt8 rn = myPhosphorPalette[rc][rp]; + const uInt8 gn = myPhosphorPalette[gc][gp]; + const uInt8 bn = myPhosphorPalette[bc][bp]; + + return (rn << 16) | (gn << 8) | bn; +} + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - void AtariNTSC::init(init_t& impl, const Setup& setup) { diff --git a/src/common/tv_filters/AtariNTSC.hxx b/src/common/tv_filters/AtariNTSC.hxx index f9c42696f..8bbea345e 100644 --- a/src/common/tv_filters/AtariNTSC.hxx +++ b/src/common/tv_filters/AtariNTSC.hxx @@ -80,12 +80,19 @@ class AtariNTSC void initialize(const Setup& setup, const uInt8* palette); void initializePalette(const uInt8* palette); + // Set phosphor palette, for use in Blargg + phosphor mode + void setPhosphorPalette(uInt8 palette[256][256]) { + memcpy(myPhosphorPalette, palette, 256 * 256); + } + // Filters one or more rows of pixels. Input pixels are 8-bit Atari // palette colors. // In_row_width is the number of pixels to get to the next input row. // Out_pitch is the number of *bytes* to get to the next output row. - void render(const uInt8* atari_in, uInt32 in_width, uInt32 in_height, - void* rgb_out, uInt32 out_pitch); + void render(const uInt8* atari_in, const uInt32 in_width, const uInt32 in_height, + void* rgb_out, const uInt32 out_pitch); + void render(const uInt8* atari_in, const uInt32 in_width, const uInt32 in_height, + void* rgb_out, const uInt32 out_pitch, uInt32* rgb_in); // Number of input pixels that will fit within given output width. // Might be rounded down slightly; use outWidth() on result to find @@ -101,6 +108,23 @@ class AtariNTSC return ((((in_width) - 1) / PIXEL_in_chunk + 1)* PIXEL_out_chunk); } + private: + // Threaded rendering + void renderThread(const uInt8* atari_in, const uInt32 in_width, + const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum, void* rgb_out, const uInt32 out_pitch); + void renderWithPhosphorThread(const uInt8* atari_in, const uInt32 in_width, + const uInt32 in_height, const uInt32 numThreads, const uInt32 threadNum, uInt32* rgb_in, void* rgb_out, const uInt32 out_pitch); + + /** + Used to calculate an averaged color for the 'phosphor' effect. + + @param c RGB Color 1 (current frame) + @param p RGB Color 2 (previous frame) + + @return Averaged value of the two RGB colors + */ + uInt32 getRGBPhosphor(const uInt32 c, const uInt32 cp) const; + private: enum { PIXEL_in_chunk = 2, // number of input pixels read per chunk @@ -138,6 +162,12 @@ class AtariNTSC #define LUMA_CUTOFF 0.20 uInt32 myColorTable[palette_size][entry_size]; + uInt8 myPhosphorPalette[256][256]; + + // Rendering threads + std::thread* myThreads; + // Number of rendering threads + uInt8 myNumThreads; struct init_t { diff --git a/src/common/tv_filters/NTSCFilter.hxx b/src/common/tv_filters/NTSCFilter.hxx index cb184337c..79bc34488 100644 --- a/src/common/tv_filters/NTSCFilter.hxx +++ b/src/common/tv_filters/NTSCFilter.hxx @@ -72,6 +72,10 @@ class NTSCFilter myNTSC.initializePalette(myTIAPalette); } + inline void setPhosphorPalette(uInt8 palette[256][256]) { + myNTSC.setPhosphorPalette(palette); + } + // The following are meant to be used strictly for toggling from the GUI string setPreset(Preset preset); @@ -110,6 +114,11 @@ class NTSCFilter { myNTSC.render(src_buf, src_width, src_height, dest_buf, dest_pitch); } + inline void render(uInt8* src_buf, uInt32 src_width, uInt32 src_height, + uInt32* dest_buf, uInt32 dest_pitch, uInt32* prev_buf) + { + myNTSC.render(src_buf, src_width, src_height, dest_buf, dest_pitch, prev_buf); + } private: // Convert from atari_ntsc_setup_t values to equivalent adjustables @@ -135,6 +144,7 @@ class NTSCFilter // and 128 black&white colours (PAL colour loss) // Each colour is represented by 3 bytes, in R,G,B order uInt8 myTIAPalette[AtariNTSC::palette_size * 3]; + uInt8* myPhosphorPalette; struct AdjustableTag { const char* const type; diff --git a/src/emucore/TIASurface.cxx b/src/emucore/TIASurface.cxx index 558ac5095..98ccac443 100644 --- a/src/emucore/TIASurface.cxx +++ b/src/emucore/TIASurface.cxx @@ -16,6 +16,7 @@ //============================================================================ #include +#include #include "FrameBuffer.hxx" #include "Settings.hxx" @@ -241,11 +242,13 @@ void TIASurface::enablePhosphor(bool enable, int blend) for(Int16 c = 255; c >= 0; c--) for(Int16 p = 255; p >= 0; p--) myPhosphorPalette[c][p] = getPhosphor(c, p); + + myNTSCFilter.setPhosphorPalette(myPhosphorPalette); } } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -inline uInt32 TIASurface::getRGBPhosphor(uInt32 c, uInt32 p) const +inline uInt32 TIASurface::getRGBPhosphor(const uInt32 c, const uInt32 p) const { #define TO_RGB(color, red, green, blue) \ const uInt8 red = color >> 16; const uInt8 green = color >> 8; const uInt8 blue = color; @@ -325,16 +328,17 @@ void TIASurface::render() { case Filter::Normal: { - uInt8* in = myTIA->frameBuffer(); + uInt8* tiaIn = myTIA->frameBuffer(); - uInt32 bufofsY = 0, screenofsY = 0, pos = 0; + uInt32 bufofs = 0, screenofsY = 0, pos; for(uInt32 y = 0; y < height; ++y) { pos = screenofsY; - for(uInt32 x = 0; x < width; ++x) - out[pos++] = myPalette[in[bufofsY + x]]; - - bufofsY += width; + for (uInt32 x = width / 2; x; --x) + { + out[pos++] = myPalette[tiaIn[bufofs++]]; + out[pos++] = myPalette[tiaIn[bufofs++]]; + } screenofsY += outPitch; } break; @@ -345,21 +349,18 @@ void TIASurface::render() uInt8* tiaIn = myTIA->frameBuffer(); uInt32* rgbIn = myRGBFramebuffer; - uInt32 bufofsY = 0, screenofsY = 0, pos = 0; - for(uInt32 y = 0; y < height; ++y) + uInt32 bufofs = 0, screenofsY = 0, pos; + for(uInt32 y = height; y ; --y) { pos = screenofsY; - for(uInt32 x = 0; x < width; ++x) + for(uInt32 x = width / 2; x ; --x) { - const uInt32 bufofs = bufofsY + x; - const uInt8 c = tiaIn[bufofs]; - const uInt32 retVal = getRGBPhosphor(myPalette[c], rgbIn[bufofs]); - // Store back into displayed frame buffer (for next frame) - rgbIn[bufofs] = retVal; - out[pos++] = retVal; + rgbIn[bufofs] = out[pos++] = getRGBPhosphor(myPalette[tiaIn[bufofs]], rgbIn[bufofs]); + bufofs++; + rgbIn[bufofs] = out[pos++] = getRGBPhosphor(myPalette[tiaIn[bufofs]], rgbIn[bufofs]); + bufofs++; } - bufofsY += width; screenofsY += outPitch; } break; @@ -373,28 +374,7 @@ void TIASurface::render() case Filter::BlarggPhosphor: { - // First do Blargg filtering - myNTSCFilter.render(myTIA->frameBuffer(), width, height, out, outPitch << 2); - - // Then do phosphor mode (blend the resulting frames) - uInt32* rgbIn = myRGBFramebuffer; - - uInt32 bufofsY = 0, screenofsY = 0, pos = 0; - for(uInt32 y = 0; y < height; ++y) - { - pos = screenofsY; - for(uInt32 x = 0; x < AtariNTSC::outWidth(kTIAW); ++x) - { - const uInt32 bufofs = bufofsY + x; - const uInt32 retVal = getRGBPhosphor(out[bufofs], rgbIn[bufofs]); - - // Store back into displayed frame buffer (for next frame) - rgbIn[bufofs] = retVal; - out[pos++] = retVal; - } - bufofsY += AtariNTSC::outWidth(kTIAW); - screenofsY += outPitch; - } + myNTSCFilter.render(myTIA->frameBuffer(), width, height, out, outPitch << 2, myRGBFramebuffer); break; } } diff --git a/src/emucore/TIASurface.hxx b/src/emucore/TIASurface.hxx index 8f2182fe9..4ebe0b87a 100644 --- a/src/emucore/TIASurface.hxx +++ b/src/emucore/TIASurface.hxx @@ -25,6 +25,8 @@ class FrameBuffer; class FBSurface; class VideoMode; +#include + #include "FrameManager.hxx" #include "Rect.hxx" #include "NTSCFilter.hxx" @@ -134,7 +136,7 @@ class TIASurface @return Averaged value of the two RGB colors */ - uInt32 getRGBPhosphor(uInt32 c, uInt32 cp) const; + uInt32 getRGBPhosphor(const uInt32 c, const uInt32 cp) const; /** Enable/disable/query NTSC filtering effects.