From 560b23103cc52b92739d3c7d0b772c1a7c4a7679 Mon Sep 17 00:00:00 2001 From: rogerman Date: Wed, 5 Mar 2014 18:20:00 +0000 Subject: [PATCH] Video Filters: - When using multiple threads, ensure that all lines are accounted for when the line count isn't evenly divisible by the thread count. - Add static method VideoFilter::GetAttributesByID(). - Reallocating the destination buffers now uses its own method. Reverts the changes from r5000. - Prepare the code for the use of multi-pass filters. --- desmume/src/filter/filter.h | 4 + desmume/src/filter/videofilter.cpp | 220 +++++++++++++++++++++++------ desmume/src/filter/videofilter.h | 53 +++---- 3 files changed, 206 insertions(+), 71 deletions(-) diff --git a/desmume/src/filter/filter.h b/desmume/src/filter/filter.h index 6e0cb14d0..23ba64a6c 100644 --- a/desmume/src/filter/filter.h +++ b/desmume/src/filter/filter.h @@ -15,12 +15,16 @@ You should have received a copy of the GNU General Public License along with the this software. If not, see . */ +#define FILTER_MAX_WORKING_SURFACE_COUNT 8 typedef struct { unsigned char *Surface; unsigned int Pitch; unsigned int Width, Height; + + unsigned char *workingSurface[FILTER_MAX_WORKING_SURFACE_COUNT]; + void *userData; } SSurface; void RenderNearest2X (SSurface Src, SSurface Dst); diff --git a/desmume/src/filter/videofilter.cpp b/desmume/src/filter/videofilter.cpp index b452c8ec2..bf5d18cd6 100644 --- a/desmume/src/filter/videofilter.cpp +++ b/desmume/src/filter/videofilter.cpp @@ -52,6 +52,12 @@ VideoFilter::VideoFilter(size_t srcWidth, newSurface.Pitch = srcWidth*2; newSurface.Width = srcWidth; newSurface.Height = srcHeight; + newSurface.userData = NULL; + + for (size_t i = 0; i < FILTER_MAX_WORKING_SURFACE_COUNT; i++) + { + newSurface.workingSurface[i] = NULL; + } _vfSrcSurface = newSurface; _vfDstSurface = newSurface; @@ -86,6 +92,7 @@ VideoFilter::VideoFilter(size_t srcWidth, _vfThread[i].task->start(false); } + _vfFunc = _vfAttributes.filterFunction; SetSourceSize(srcWidth, srcHeight); } @@ -117,6 +124,12 @@ VideoFilter::~VideoFilter() free(_vfDstSurface.Surface); _vfDstSurface.Surface = NULL; + for (size_t i = 0; i < _vfAttributes.workingSurfaceCount; i++) + { + free(_vfDstSurface.workingSurface[i]); + _vfDstSurface.workingSurface[i] = NULL; + } + ThreadLockUnlock(&_lockDst); free(_vfSrcSurfacePixBuffer); @@ -131,6 +144,79 @@ VideoFilter::~VideoFilter() ThreadCondDestroy(&_condRunning); } +bool VideoFilter::AllocateDstBuffer(const size_t dstWidth, const size_t dstHeight, const size_t workingSurfaceCount) +{ + bool result = false; + + // Allocate all buffers. + uint32_t *newSurfaceBuffer = (uint32_t *)calloc(dstWidth * dstHeight, sizeof(uint32_t)); + if (newSurfaceBuffer == NULL) + { + return result; + } + + ThreadLockLock(&this->_lockDst); + + for (size_t i = 0; i < FILTER_MAX_WORKING_SURFACE_COUNT; i++) + { + if (i < workingSurfaceCount) + { + free(this->_vfDstSurface.workingSurface[i]); + this->_vfDstSurface.workingSurface[i] = (unsigned char *)calloc(dstWidth * dstHeight, sizeof(uint32_t)); + } + else + { + free(this->_vfDstSurface.workingSurface[i]); + this->_vfDstSurface.workingSurface[i] = NULL; + } + } + + // Set up SSurface structure. + this->_vfDstSurface.Width = dstWidth; + this->_vfDstSurface.Height = dstHeight; + this->_vfDstSurface.Pitch = dstWidth * 2; + + free(this->_vfDstSurface.Surface); + this->_vfDstSurface.Surface = (unsigned char *)newSurfaceBuffer; + + // Update the surfaces on threads. + const size_t threadCount = this->_vfThread.size(); + const unsigned int linesPerThread = (threadCount > 1) ? dstHeight/threadCount : dstHeight; + unsigned int remainingLines = dstHeight; + + for (size_t i = 0; i < threadCount; i++) + { + SSurface &threadDstSurface = this->_vfThread[i].param.dstSurface; + threadDstSurface = this->_vfDstSurface; + threadDstSurface.Height = (linesPerThread < remainingLines) ? linesPerThread : remainingLines; + remainingLines -= threadDstSurface.Height; + + // Add any remaining lines to the last thread. + if (i == threadCount-1) + { + threadDstSurface.Height += remainingLines; + } + + if (i > 0) + { + SSurface &prevThreadDstSurface = this->_vfThread[i - 1].param.dstSurface; + threadDstSurface.Surface = (unsigned char *)((uint32_t *)prevThreadDstSurface.Surface + (prevThreadDstSurface.Width * prevThreadDstSurface.Height)); + + for (size_t j = 0; j < workingSurfaceCount; j++) + { + threadDstSurface.workingSurface[j] = (unsigned char *)((uint32_t *)prevThreadDstSurface.workingSurface[j] + (prevThreadDstSurface.Width * prevThreadDstSurface.Height)); + } + } + + this->_vfThread[i].param.filterFunction = this->_vfFunc; + } + + ThreadLockUnlock(&this->_lockDst); + + result = true; + return result; +} + /******************************************************************************************** SetSourceSize() @@ -161,10 +247,11 @@ bool VideoFilter::SetSourceSize(const size_t width, const size_t height) return result; } - if (this->_vfSrcSurface.Width != width || this->_vfSrcSurface.Height != height) + if (this->_vfSrcSurface.Surface == NULL || this->_vfSrcSurface.Width != width || this->_vfSrcSurface.Height != height) { sizeChanged = true; } + this->_vfSrcSurface.Width = width; this->_vfSrcSurface.Height = height; this->_vfSrcSurface.Pitch = width * 2; @@ -177,12 +264,21 @@ bool VideoFilter::SetSourceSize(const size_t width, const size_t height) // Update the surfaces on threads. size_t threadCount = this->_vfThread.size(); + const unsigned int linesPerThread = (threadCount > 1) ? this->_vfSrcSurface.Height/threadCount : this->_vfSrcSurface.Height; + unsigned int remainingLines = this->_vfSrcSurface.Height; for (size_t i = 0; i < threadCount; i++) { SSurface &threadSrcSurface = this->_vfThread[i].param.srcSurface; threadSrcSurface = this->_vfSrcSurface; - threadSrcSurface.Height /= threadCount; + threadSrcSurface.Height = (linesPerThread < remainingLines) ? linesPerThread : remainingLines; + remainingLines -= threadSrcSurface.Height; + + // Add any remaining lines to the last thread. + if (i == threadCount-1) + { + threadSrcSurface.Height += remainingLines; + } if (i > 0) { @@ -193,9 +289,20 @@ bool VideoFilter::SetSourceSize(const size_t width, const size_t height) ThreadLockUnlock(&this->_lockSrc); - const VideoFilterAttributes vfAttr = this->GetAttributes(); - result = this->ChangeFilterByAttributes(vfAttr, sizeChanged); + if (sizeChanged) + { + const VideoFilterAttributes vfAttr = this->GetAttributes(); + const size_t dstWidth = width * vfAttr.scaleMultiply / vfAttr.scaleDivide; + const size_t dstHeight = height * vfAttr.scaleMultiply / vfAttr.scaleDivide; + + result = this->AllocateDstBuffer(dstWidth, dstHeight, vfAttr.workingSurfaceCount); + if (!result) + { + return result; + } + } + result = true; return result; } @@ -221,7 +328,7 @@ bool VideoFilter::ChangeFilterByID(const VideoFilterTypeID typeID) return result; } - result = this->ChangeFilterByAttributes(VideoFilterAttributesList[typeID], false); + result = this->ChangeFilterByAttributes(VideoFilterAttributesList[typeID]); return result; } @@ -238,7 +345,7 @@ bool VideoFilter::ChangeFilterByID(const VideoFilterTypeID typeID) A bool that reports if the filter change was successful. A value of true means success, while a value of false means failure. ********************************************************************************************/ -bool VideoFilter::ChangeFilterByAttributes(const VideoFilterAttributes &vfAttr, const bool forceRealloc) +bool VideoFilter::ChangeFilterByAttributes(const VideoFilterAttributes &vfAttr) { bool result = false; @@ -247,13 +354,30 @@ bool VideoFilter::ChangeFilterByAttributes(const VideoFilterAttributes &vfAttr, return result; } - if (!forceRealloc && this->_vfDstSurface.Surface != NULL && this->_vfAttributes.scaleMultiply == vfAttr.scaleMultiply && this->_vfAttributes.scaleDivide == vfAttr.scaleDivide) + ThreadLockLock(&this->_lockDst); + unsigned char *dstSurface = this->_vfDstSurface.Surface; + ThreadLockUnlock(&this->_lockDst); + + const VideoFilterAttributes currentAttr = this->GetAttributes(); + + if (dstSurface != NULL && + currentAttr.scaleMultiply == vfAttr.scaleMultiply && + currentAttr.scaleDivide == vfAttr.scaleDivide && + currentAttr.workingSurfaceCount == vfAttr.workingSurfaceCount) { - // If we have an existing buffer and the new size is identical to the old size, - // we can skip the costly construction of the buffer and simply clear it instead. + // If we have existing buffers and the new size is identical to the old size, we + // can skip the costly construction of the buffers and simply clear them instead. + ThreadLockLock(&this->_lockDst); - memset(this->_vfDstSurface.Surface, 0, this->_vfDstSurface.Width * _vfDstSurface.Height * sizeof(uint32_t)); + const size_t bufferSizeBytes = this->_vfDstSurface.Width * this->_vfDstSurface.Height * sizeof(uint32_t); + + memset(this->_vfDstSurface.Surface, 0, bufferSizeBytes); + for (size_t i = 0; i < currentAttr.workingSurfaceCount; i++) + { + memset(this->_vfDstSurface.workingSurface[i], 0, bufferSizeBytes); + } + this->_vfFunc = vfAttr.filterFunction; const size_t threadCount = this->_vfThread.size(); @@ -268,48 +392,19 @@ bool VideoFilter::ChangeFilterByAttributes(const VideoFilterAttributes &vfAttr, { // Construct a new destination buffer per filter attributes. ThreadLockLock(&this->_lockSrc); - const size_t srcWidth = this->_vfSrcSurface.Width; - const size_t srcHeight = this->_vfSrcSurface.Height; + const size_t dstWidth = this->_vfSrcSurface.Width * vfAttr.scaleMultiply / vfAttr.scaleDivide; + const size_t dstHeight = this->_vfSrcSurface.Height * vfAttr.scaleMultiply / vfAttr.scaleDivide; ThreadLockUnlock(&this->_lockSrc); - const size_t dstWidth = srcWidth * vfAttr.scaleMultiply / vfAttr.scaleDivide; - const size_t dstHeight = srcHeight * vfAttr.scaleMultiply / vfAttr.scaleDivide; - const VideoFilterFunc filterFunction = vfAttr.filterFunction; - ThreadLockLock(&this->_lockDst); + this->_vfFunc = vfAttr.filterFunction; + ThreadLockUnlock(&this->_lockDst); - uint32_t *newSurfaceBuffer = (uint32_t *)calloc(dstWidth * dstHeight, sizeof(uint32_t)); - if (newSurfaceBuffer == NULL) + result = this->AllocateDstBuffer(dstWidth, dstHeight, vfAttr.workingSurfaceCount); + if (!result) { return result; } - - this->_vfFunc = filterFunction; - this->_vfDstSurface.Width = dstWidth; - this->_vfDstSurface.Height = dstHeight; - this->_vfDstSurface.Pitch = dstWidth * 2; - - free(this->_vfDstSurface.Surface); - this->_vfDstSurface.Surface = (unsigned char *)newSurfaceBuffer; - - // Update the surfaces on threads. - const size_t threadCount = this->_vfThread.size(); - for (size_t i = 0; i < threadCount; i++) - { - SSurface &threadDstSurface = this->_vfThread[i].param.dstSurface; - threadDstSurface = this->_vfDstSurface; - threadDstSurface.Height /= threadCount; - - if (i > 0) - { - SSurface &prevThreadDstSurface = this->_vfThread[i - 1].param.dstSurface; - threadDstSurface.Surface = (unsigned char *)((uint32_t *)prevThreadDstSurface.Surface + (prevThreadDstSurface.Width * prevThreadDstSurface.Height)); - } - - this->_vfThread[i].param.filterFunction = this->_vfFunc; - } - - ThreadLockUnlock(&this->_lockDst); } this->SetAttributes(vfAttr); @@ -477,10 +572,43 @@ void VideoFilter::RunFilterCustomByAttributes(const uint32_t *__restrict srcBuff } else { + for (size_t i = 0; i < vfAttr.workingSurfaceCount; i++) + { + dstSurface.workingSurface[i] = (unsigned char *)calloc(dstWidth * dstHeight, sizeof(uint32_t)); + } + filterFunction(srcSurface, dstSurface); + + for (size_t i = 0; i < vfAttr.workingSurfaceCount; i++) + { + free(dstSurface.workingSurface[i]); + } } } +/******************************************************************************************** + GetAttributesByID() - STATIC + + Returns the filter attributes associated with the given type ID. + + Takes: + typeID - The type ID of the video filter. See the VideoFilterTypeID + enumeration for possible values. + + Returns: + A copy of the filter attributes of the given type ID. If typeID is + invalid, this method returns the attributes of VideoFilterTypeID_None. + ********************************************************************************************/ +VideoFilterAttributes VideoFilter::GetAttributesByID(const VideoFilterTypeID typeID) +{ + if (typeID >= VideoFilterTypeIDCount) + { + return VideoFilterAttributesList[VideoFilterTypeID_None]; + } + + return VideoFilterAttributesList[typeID]; +} + /******************************************************************************************** GetTypeStringByID() - STATIC @@ -595,7 +723,7 @@ size_t VideoFilter::GetDstHeight() return height; } -VideoFilterParamType VideoFilter::GetFilterParameterType(VideoFilterParamID paramID) +VideoFilterParamType VideoFilter::GetFilterParameterType(VideoFilterParamID paramID) const { return _VideoFilterParamAttributesList[paramID].type; } diff --git a/desmume/src/filter/videofilter.h b/desmume/src/filter/videofilter.h index 442234a0d..c8be4f510 100644 --- a/desmume/src/filter/videofilter.h +++ b/desmume/src/filter/videofilter.h @@ -79,33 +79,34 @@ typedef struct VideoFilterFunc filterFunction; size_t scaleMultiply; size_t scaleDivide; + size_t workingSurfaceCount; } VideoFilterAttributes; // Attributes list of known video filters, indexed using VideoFilterTypeID. const VideoFilterAttributes VideoFilterAttributesList[] = { - {VideoFilterTypeID_None, "None", NULL, 1, 1}, - {VideoFilterTypeID_LQ2X, "LQ2x", &RenderLQ2X, 2, 1}, - {VideoFilterTypeID_LQ2XS, "LQ2xS", &RenderLQ2XS, 2, 1}, - {VideoFilterTypeID_HQ2X, "HQ2x", &RenderHQ2X, 2, 1}, - {VideoFilterTypeID_HQ2XS, "HQ2xS", &RenderHQ2XS, 2, 1}, - {VideoFilterTypeID_HQ4X, "HQ4x", &RenderHQ4X, 4, 1}, - {VideoFilterTypeID_2xSaI, "2xSaI", &Render2xSaI, 2, 1}, - {VideoFilterTypeID_Super2xSaI, "Super 2xSaI", &RenderSuper2xSaI, 2, 1}, - {VideoFilterTypeID_SuperEagle, "Super Eagle", &RenderSuperEagle, 2, 1}, - {VideoFilterTypeID_Scanline, "Scanline", &RenderScanline, 2, 1}, - {VideoFilterTypeID_Bilinear, "Bilinear", &RenderBilinear, 2, 1}, - {VideoFilterTypeID_Nearest2X, "Nearest 2x", &RenderNearest2X, 2, 1}, - {VideoFilterTypeID_Nearest1_5X, "Nearest 1.5x", &RenderNearest_1Point5x, 3, 2}, - {VideoFilterTypeID_NearestPlus1_5X, "Nearest+ 1.5x", &RenderNearestPlus_1Point5x, 3, 2}, - {VideoFilterTypeID_EPX, "EPX", &RenderEPX, 2, 1}, - {VideoFilterTypeID_EPXPlus, "EPX+", &RenderEPXPlus, 2, 1}, - {VideoFilterTypeID_EPX1_5X, "EPX 1.5x", &RenderEPX_1Point5x, 3, 2}, - {VideoFilterTypeID_EPXPlus1_5X, "EPX+ 1.5x", &RenderEPXPlus_1Point5x, 3, 2}, - {VideoFilterTypeID_HQ4XS, "HQ4xS", &RenderHQ4XS, 4, 1}, - {VideoFilterTypeID_2xBRZ, "2xBRZ", &Render2xBRZ, 2, 1}, - {VideoFilterTypeID_3xBRZ, "3xBRZ", &Render3xBRZ, 3, 1}, - {VideoFilterTypeID_4xBRZ, "4xBRZ", &Render4xBRZ, 4, 1}, - {VideoFilterTypeID_5xBRZ, "5xBRZ", &Render5xBRZ, 5, 1} }; + {VideoFilterTypeID_None, "None", NULL, 1, 1, 0}, + {VideoFilterTypeID_LQ2X, "LQ2x", &RenderLQ2X, 2, 1, 0}, + {VideoFilterTypeID_LQ2XS, "LQ2xS", &RenderLQ2XS, 2, 1, 0}, + {VideoFilterTypeID_HQ2X, "HQ2x", &RenderHQ2X, 2, 1, 0}, + {VideoFilterTypeID_HQ2XS, "HQ2xS", &RenderHQ2XS, 2, 1, 0}, + {VideoFilterTypeID_HQ4X, "HQ4x", &RenderHQ4X, 4, 1, 0}, + {VideoFilterTypeID_2xSaI, "2xSaI", &Render2xSaI, 2, 1, 0}, + {VideoFilterTypeID_Super2xSaI, "Super 2xSaI", &RenderSuper2xSaI, 2, 1, 0}, + {VideoFilterTypeID_SuperEagle, "Super Eagle", &RenderSuperEagle, 2, 1, 0}, + {VideoFilterTypeID_Scanline, "Scanline", &RenderScanline, 2, 1, 0}, + {VideoFilterTypeID_Bilinear, "Bilinear", &RenderBilinear, 2, 1, 0}, + {VideoFilterTypeID_Nearest2X, "Nearest 2x", &RenderNearest2X, 2, 1, 0}, + {VideoFilterTypeID_Nearest1_5X, "Nearest 1.5x", &RenderNearest_1Point5x, 3, 2, 0}, + {VideoFilterTypeID_NearestPlus1_5X, "Nearest+ 1.5x", &RenderNearestPlus_1Point5x, 3, 2, 0}, + {VideoFilterTypeID_EPX, "EPX", &RenderEPX, 2, 1, 0}, + {VideoFilterTypeID_EPXPlus, "EPX+", &RenderEPXPlus, 2, 1, 0}, + {VideoFilterTypeID_EPX1_5X, "EPX 1.5x", &RenderEPX_1Point5x, 3, 2, 0}, + {VideoFilterTypeID_EPXPlus1_5X, "EPX+ 1.5x", &RenderEPXPlus_1Point5x, 3, 2, 0}, + {VideoFilterTypeID_HQ4XS, "HQ4xS", &RenderHQ4XS, 4, 1, 0}, + {VideoFilterTypeID_2xBRZ, "2xBRZ", &Render2xBRZ, 2, 1, 0}, + {VideoFilterTypeID_3xBRZ, "3xBRZ", &Render3xBRZ, 3, 1, 0}, + {VideoFilterTypeID_4xBRZ, "4xBRZ", &Render4xBRZ, 4, 1, 0}, + {VideoFilterTypeID_5xBRZ, "5xBRZ", &Render5xBRZ, 5, 1, 0} }; // VIDEO FILTER PARAMETER DATA TYPES enum VideoFilterParamType @@ -180,6 +181,7 @@ private: ThreadLock _lockAttributes; ThreadCond _condRunning; + bool AllocateDstBuffer(const size_t dstWidth, const size_t dstHeight, const size_t workingSurfaceCount); void SetAttributes(const VideoFilterAttributes &vfAttr); public: @@ -188,11 +190,12 @@ public: bool SetSourceSize(const size_t width, const size_t height); bool ChangeFilterByID(const VideoFilterTypeID typeID); - bool ChangeFilterByAttributes(const VideoFilterAttributes &vfAttr, const bool forceRealloc); + bool ChangeFilterByAttributes(const VideoFilterAttributes &vfAttr); uint32_t* RunFilter(); static void RunFilterCustomByID(const uint32_t *__restrict srcBuffer, uint32_t *__restrict dstBuffer, const size_t srcWidth, const size_t srcHeight, const VideoFilterTypeID typeID); static void RunFilterCustomByAttributes(const uint32_t *__restrict srcBuffer, uint32_t *__restrict dstBuffer, const size_t srcWidth, const size_t srcHeight, const VideoFilterAttributes &vfAttr); + static VideoFilterAttributes GetAttributesByID(const VideoFilterTypeID typeID); static const char* GetTypeStringByID(const VideoFilterTypeID typeID); VideoFilterAttributes GetAttributes(); @@ -204,7 +207,7 @@ public: size_t GetSrcHeight(); size_t GetDstWidth(); size_t GetDstHeight(); - VideoFilterParamType GetFilterParameterType(VideoFilterParamID paramID); + VideoFilterParamType GetFilterParameterType(VideoFilterParamID paramID) const; int GetFilterParameteri(VideoFilterParamID paramID); unsigned int GetFilterParameterui(VideoFilterParamID paramID); float GetFilterParameterf(VideoFilterParamID paramID);