From a5c102540f4271d1ba317305ff42c98a8223006e Mon Sep 17 00:00:00 2001 From: rogerman Date: Sun, 7 Feb 2016 01:34:52 +0000 Subject: [PATCH] Cocoa Port: - In the OpenGL blitter, use DMA texture uploads for all possible video source cases. Doing this removes a longstanding MAJOR performance bottleneck. - Native-sized video sees up to a 15% performance improvement, while higher-resolution video can see up to a 100% performance improvement!!!!! --- desmume/src/cocoa/OGLDisplayOutput.cpp | 294 +++++++++++++----- desmume/src/cocoa/OGLDisplayOutput.h | 25 +- desmume/src/cocoa/cocoa_GPU.mm | 59 +++- desmume/src/cocoa/cocoa_output.h | 29 +- desmume/src/cocoa/cocoa_output.mm | 81 ++++- .../userinterface/DisplayWindowController.mm | 56 +++- 6 files changed, 429 insertions(+), 115 deletions(-) diff --git a/desmume/src/cocoa/OGLDisplayOutput.cpp b/desmume/src/cocoa/OGLDisplayOutput.cpp index 98b6d7cf0..fc729c96d 100644 --- a/desmume/src/cocoa/OGLDisplayOutput.cpp +++ b/desmume/src/cocoa/OGLDisplayOutput.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2015 DeSmuME team + Copyright (C) 2014-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -4589,6 +4589,19 @@ void OGLVideoOutput::RenderOGL() } } +void OGLVideoOutput::FinishOGL() +{ + for (size_t i = 0; i < _layerList->size(); i++) + { + OGLVideoLayer *theLayer = (*_layerList)[i]; + + if (theLayer->IsVisible()) + { + theLayer->FinishOGL(); + } + } +} + #pragma mark - OGLFilter::OGLFilter() @@ -4771,10 +4784,10 @@ GLuint OGLFilter::RunFilterOGL(GLuint srcTexID) glUseProgram(this->_program->GetProgramID()); glViewport(0, 0, this->_dstWidth, this->_dstHeight); - glClear(GL_COLOR_BUFFER_BIT); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcTexID); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glClear(GL_COLOR_BUFFER_BIT); glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_BYTE, 0); glBindVertexArrayDESMUME(0); @@ -4855,15 +4868,16 @@ GLuint OGLFilterDeposterize::RunFilterOGL(GLuint srcTexID) glUseProgram(this->_program->GetProgramID()); glViewport(0, 0, this->_dstWidth, this->_dstHeight); - glClear(GL_COLOR_BUFFER_BIT); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, this->_texIntermediateID, 0); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcTexID); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, this->_texIntermediateID, 0); + glClear(GL_COLOR_BUFFER_BIT); glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_BYTE, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, this->_texDstID, 0); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texIntermediateID); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, this->_texDstID, 0); + glClear(GL_COLOR_BUFFER_BIT); glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_BYTE, 0); glBindVertexArrayDESMUME(0); @@ -4910,8 +4924,6 @@ OGLImage::OGLImage(OGLInfo *oglInfo, GLsizei imageWidth, GLsizei imageHeight, GL glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); - glTextureRangeAPPLE(GL_TEXTURE_RECTANGLE_ARB, _vf->GetDstWidth() * _vf->GetDstHeight() * sizeof(uint32_t), _vfMasterDstBuffer); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, _texVideoInputDataID); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -5566,13 +5578,7 @@ void OGLImage::SetCPUPixelScalerOGL(const VideoFilterTypeID filterID) this->_vf->SetDstBufferPtr(newMasterBuffer); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texCPUFilterDstID); - glTextureRangeAPPLE(GL_TEXTURE_RECTANGLE_ARB, newDstBufferWidth * newDstBufferHeight * sizeof(uint32_t), newMasterBuffer); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); - - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, newDstBufferWidth, newDstBufferHeight, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, newMasterBuffer); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); _vfMasterDstBuffer = newMasterBuffer; @@ -5593,7 +5599,7 @@ void OGLImage::LoadFrameOGL(const uint32_t *frameData, GLint x, GLint y, GLsizei void OGLImage::ProcessOGL() { VideoFilter *currentFilter = this->_vf; - const bool isUsingCPUPixelScaler = this->_pixelScaler != VideoFilterTypeID_None && !this->_useShaderBasedPixelScaler; + const bool isUsingCPUPixelScaler = (this->_pixelScaler != VideoFilterTypeID_None) && !this->_useShaderBasedPixelScaler; // Source if (this->_useDeposterize) @@ -6230,7 +6236,8 @@ OGLDisplayLayer::OGLDisplayLayer(OGLVideoOutput *oglVO) _vf[0] = new VideoFilter(GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, VideoFilterTypeID_None, 0); _vf[1] = new VideoFilter(GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, VideoFilterTypeID_None, 0); - _vfMasterDstBuffer = (uint32_t *)calloc(_vf[0]->GetSrcWidth() * (_vf[0]->GetSrcHeight() + _vf[1]->GetSrcHeight()), sizeof(uint32_t)); + _vfMasterDstBuffer = (uint32_t *)calloc(_vf[0]->GetDstWidth() * (_vf[0]->GetDstHeight() + _vf[1]->GetDstHeight()), sizeof(uint32_t)); + _vfMasterDstBufferSize = _vf[0]->GetDstWidth() * (_vf[0]->GetDstHeight() + _vf[1]->GetDstHeight()) * sizeof(uint32_t); _vf[0]->SetDstBufferPtr(_vfMasterDstBuffer); _vf[1]->SetDstBufferPtr(_vfMasterDstBuffer + (_vf[0]->GetDstWidth() * _vf[0]->GetDstHeight())); @@ -6248,6 +6255,15 @@ OGLDisplayLayer::OGLDisplayLayer(OGLVideoOutput *oglVO) _texLoadedHeight[0] = (GLfloat)GPU_DISPLAY_HEIGHT; _texLoadedHeight[1] = (GLfloat)GPU_DISPLAY_HEIGHT; + _videoSrcNativeBuffer[0] = NULL; + _videoSrcNativeBuffer[1] = NULL; + _videoSrcCustomBuffer[0] = NULL; + _videoSrcCustomBuffer[1] = NULL; + _videoSrcCustomBufferWidth[0] = GPU_DISPLAY_WIDTH; + _videoSrcCustomBufferWidth[1] = GPU_DISPLAY_WIDTH; + _videoSrcCustomBufferHeight[0] = GPU_DISPLAY_HEIGHT; + _videoSrcCustomBufferHeight[1] = GPU_DISPLAY_HEIGHT; + // Set up textures glGenTextures(2, _texCPUFilterDstID); glGenTextures(2, _texVideoInputDataNativeID); @@ -6260,61 +6276,49 @@ OGLDisplayLayer::OGLDisplayLayer(OGLVideoOutput *oglVO) glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); - glTextureRangeAPPLE(GL_TEXTURE_RECTANGLE_ARB, _vf[0]->GetDstWidth() * _vf[0]->GetDstHeight() * sizeof(uint32_t), _vf[0]->GetDstBufferPtr()); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, _vf[0]->GetDstWidth(), _vf[0]->GetDstHeight(), 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, _vf[0]->GetDstBufferPtr()); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, _texCPUFilterDstID[1]); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); - glTextureRangeAPPLE(GL_TEXTURE_RECTANGLE_ARB, _vf[1]->GetDstWidth() * _vf[1]->GetDstHeight() * sizeof(uint32_t), _vf[1]->GetDstBufferPtr()); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, _vf[1]->GetDstWidth(), _vf[1]->GetDstHeight(), 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, _vf[1]->GetDstBufferPtr()); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, _texVideoInputDataNativeID[0]); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, _vf[0]->GetSrcBufferPtr()); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, _texVideoInputDataNativeID[1]); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, _vf[1]->GetSrcBufferPtr()); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, _texVideoInputDataCustomID[0]); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, _vf[0]->GetSrcBufferPtr()); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, _texVideoInputDataCustomID[1]); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, _vf[1]->GetSrcBufferPtr()); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); + // Set up fences for DMA texture uploads + glGenFencesAPPLE(2, _fenceTexUploadNativeID); + glGenFencesAPPLE(2, _fenceTexUploadCustomID); + // Set up VBOs glGenBuffersARB(1, &_vboVertexID); glGenBuffersARB(1, &_vboTexCoordID); @@ -6418,6 +6422,9 @@ OGLDisplayLayer::~OGLDisplayLayer() _isVAOPresent = false; } + glDeleteFencesAPPLE(2, _fenceTexUploadNativeID); + glDeleteFencesAPPLE(2, _fenceTexUploadCustomID); + glDeleteBuffersARB(1, &this->_vboVertexID); glDeleteBuffersARB(1, &this->_vboTexCoordID); glDeleteBuffersARB(1, &this->_vboElementID); @@ -6450,6 +6457,7 @@ OGLDisplayLayer::~OGLDisplayLayer() delete this->_vf[0]; delete this->_vf[1]; free(_vfMasterDstBuffer); + _vfMasterDstBufferSize = 0; } void OGLDisplayLayer::UploadHQnxLUTs() @@ -6494,6 +6502,87 @@ void OGLDisplayLayer::UploadHQnxLUTs() glActiveTexture(GL_TEXTURE0); } +void OGLDisplayLayer::DetermineTextureStorageHints(GLint &videoSrcTexStorageHint, GLint &cpuFilterTexStorageHint) +{ + const bool isUsingCPUPixelScaler = (this->_pixelScaler != VideoFilterTypeID_None) && !this->_useShaderBasedPixelScaler; + videoSrcTexStorageHint = GL_STORAGE_PRIVATE_APPLE; + cpuFilterTexStorageHint = GL_STORAGE_PRIVATE_APPLE; + + glFinish(); + + if (this->_videoSrcBufferHead == NULL) + { + glTextureRangeAPPLE(GL_TEXTURE_RECTANGLE_ARB, 0, NULL); + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); + } + else + { + if (isUsingCPUPixelScaler && (this->_vfMasterDstBufferSize >= this->_videoSrcBufferSize)) + { + cpuFilterTexStorageHint = GL_STORAGE_SHARED_APPLE; + glTextureRangeAPPLE(GL_TEXTURE_RECTANGLE_ARB, this->_vfMasterDstBufferSize, this->_vfMasterDstBuffer); + } + else + { + videoSrcTexStorageHint = GL_STORAGE_SHARED_APPLE; + glTextureRangeAPPLE(GL_TEXTURE_RECTANGLE_ARB, this->_videoSrcBufferSize, this->_videoSrcBufferHead); + } + + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); + } +} + +void OGLDisplayLayer::SetVideoBuffers(const void *videoBufferHead, + const void *nativeBuffer0, + const void *nativeBuffer1, + const void *customBuffer0, const size_t customWidth0, const size_t customHeight0, + const void *customBuffer1, const size_t customWidth1, const size_t customHeight1) +{ + GLint videoSrcTexStorageHint = GL_STORAGE_PRIVATE_APPLE; + GLint cpuFilterTexStorageHint = GL_STORAGE_PRIVATE_APPLE; + + this->_videoSrcBufferHead = (uint16_t *)videoBufferHead; + this->_videoSrcBufferSize = (GPU_DISPLAY_WIDTH * GPU_DISPLAY_HEIGHT * 2 * sizeof(uint16_t)) + (customWidth0 * customHeight0 * sizeof(uint16_t)) + (customWidth1 * customHeight1 * sizeof(uint16_t)); + this->_videoSrcNativeBuffer[0] = (uint16_t *)nativeBuffer0; + this->_videoSrcNativeBuffer[1] = (uint16_t *)nativeBuffer1; + this->_videoSrcCustomBuffer[0] = (uint16_t *)customBuffer0; + this->_videoSrcCustomBuffer[1] = (uint16_t *)customBuffer1; + this->_videoSrcCustomBufferWidth[0] = customWidth0; + this->_videoSrcCustomBufferWidth[1] = customWidth1; + this->_videoSrcCustomBufferHeight[0] = customHeight0; + this->_videoSrcCustomBufferHeight[1] = customHeight1; + + this->DetermineTextureStorageHints(videoSrcTexStorageHint, cpuFilterTexStorageHint); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texCPUFilterDstID[0]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, cpuFilterTexStorageHint); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, _vf[0]->GetDstWidth(), _vf[0]->GetDstHeight(), 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, _vf[0]->GetDstBufferPtr()); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texCPUFilterDstID[1]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, cpuFilterTexStorageHint); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, _vf[1]->GetDstWidth(), _vf[1]->GetDstHeight(), 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, _vf[1]->GetDstBufferPtr()); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataNativeID[0]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, this->_videoSrcNativeBuffer[0]); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataNativeID[1]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, this->_videoSrcNativeBuffer[1]); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[0]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, this->_videoSrcCustomBufferWidth[0], this->_videoSrcCustomBufferHeight[0], 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, this->_videoSrcCustomBuffer[0]); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[1]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, this->_videoSrcCustomBufferWidth[1], this->_videoSrcCustomBufferHeight[1], 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, this->_videoSrcCustomBuffer[1]); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); + + glFinish(); +} + bool OGLDisplayLayer::GetFiltersPreferGPU() { return this->_filtersPreferGPU; @@ -6503,6 +6592,25 @@ void OGLDisplayLayer::SetFiltersPreferGPUOGL(bool preferGPU) { this->_filtersPreferGPU = preferGPU; this->_useShaderBasedPixelScaler = (preferGPU) ? this->SetGPUPixelScalerOGL(this->_pixelScaler) : false; + + GLint videoSrcTexStorageHint = GL_STORAGE_PRIVATE_APPLE; + GLint cpuFilterTexStorageHint = GL_STORAGE_PRIVATE_APPLE; + this->DetermineTextureStorageHints(videoSrcTexStorageHint, cpuFilterTexStorageHint); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texCPUFilterDstID[0]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, cpuFilterTexStorageHint); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texCPUFilterDstID[1]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, cpuFilterTexStorageHint); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataNativeID[0]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataNativeID[1]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[0]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[1]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); } uint16_t OGLDisplayLayer::GetDisplayWidth() @@ -6520,23 +6628,6 @@ void OGLDisplayLayer::SetDisplaySize(uint16_t w, uint16_t h) this->_displayWidth = w; this->_displayHeight = h; this->GetNormalSize(this->_normalWidth, this->_normalHeight); - - uint32_t *emptyBuffer = (uint32_t *)calloc(w * h, sizeof(uint32_t)); - - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[0]); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, w, h, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, emptyBuffer); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); - - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[1]); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, w, h, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, emptyBuffer); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); - - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); - - free(emptyBuffer); - this->UpdateVertices(); } @@ -6708,6 +6799,8 @@ void OGLDisplayLayer::GetNormalSize(double &w, double &h) void OGLDisplayLayer::ResizeCPUPixelScalerOGL(const size_t srcWidthMain, const size_t srcHeightMain, const size_t srcWidthTouch, const size_t srcHeightTouch, const size_t scaleMultiply, const size_t scaleDivide) { + this->FinishOGL(); + const GLsizei newDstBufferWidth = (srcWidthMain + srcWidthTouch) * scaleMultiply / scaleDivide; const GLsizei newDstBufferHeight = (srcHeightMain + srcHeightTouch) * scaleMultiply / scaleDivide; @@ -6719,23 +6812,31 @@ void OGLDisplayLayer::ResizeCPUPixelScalerOGL(const size_t srcWidthMain, const s const GLsizei newDstBufferSingleWidth = srcWidthMain * scaleMultiply / scaleDivide; const GLsizei newDstBufferSingleHeight = srcHeightMain * scaleMultiply / scaleDivide; + GLint videoSrcTexStorageHint = GL_STORAGE_PRIVATE_APPLE; + GLint cpuFilterTexStorageHint = GL_STORAGE_PRIVATE_APPLE; + this->DetermineTextureStorageHints(videoSrcTexStorageHint, cpuFilterTexStorageHint); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texCPUFilterDstID[0]); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); - glTextureRangeAPPLE(GL_TEXTURE_RECTANGLE_ARB, newDstBufferSingleWidth * newDstBufferSingleHeight * sizeof(uint32_t), newMasterBuffer); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, cpuFilterTexStorageHint); glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, newDstBufferSingleWidth, newDstBufferSingleHeight, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, newMasterBuffer); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texCPUFilterDstID[1]); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); - glTextureRangeAPPLE(GL_TEXTURE_RECTANGLE_ARB, newDstBufferSingleWidth * newDstBufferSingleHeight * sizeof(uint32_t), newMasterBuffer + (newDstBufferSingleWidth * newDstBufferSingleHeight)); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, newDstBufferSingleWidth, newDstBufferSingleHeight, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, newMasterBuffer + (newDstBufferSingleWidth * newDstBufferSingleHeight)); - glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, cpuFilterTexStorageHint); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, srcWidthTouch * scaleMultiply / scaleDivide, srcHeightTouch * scaleMultiply / scaleDivide, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, newMasterBuffer + (newDstBufferSingleWidth * newDstBufferSingleHeight)); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataNativeID[0]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataNativeID[1]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[0]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[1]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_STORAGE_HINT_APPLE, videoSrcTexStorageHint); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); _vfMasterDstBuffer = newMasterBuffer; + _vfMasterDstBufferSize = newDstBufferWidth * newDstBufferHeight * sizeof(uint32_t); free(oldMasterBuffer); } @@ -7138,7 +7239,9 @@ bool OGLDisplayLayer::SetGPUPixelScalerOGL(const VideoFilterTypeID filterID) if (willUseShaderBasedPixelScaler) { + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); this->_shaderFilter[i]->SetScaleOGL(vfScale); + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); } } @@ -7162,26 +7265,28 @@ void OGLDisplayLayer::SetCPUPixelScalerOGL(const VideoFilterTypeID filterID) this->_vf[1]->ChangeFilterByID(filterID); } -void OGLDisplayLayer::LoadFrameOGL(const uint16_t *frameData0, const uint16_t *frameData1, GLsizei w0, GLsizei h0, GLsizei w1, GLsizei h1) +void OGLDisplayLayer::LoadFrameOGL(bool isMainSizeNative, bool isTouchSizeNative) { - const bool isUsingCPUPixelScaler = this->_pixelScaler != VideoFilterTypeID_None && !this->_useShaderBasedPixelScaler; - const bool loadMainScreen = (frameData0 != NULL); - const bool loadTouchScreen = (frameData1 != NULL); + const bool isUsingCPUPixelScaler = (this->_pixelScaler != VideoFilterTypeID_None) && !this->_useShaderBasedPixelScaler; + const bool loadMainScreen = (this->_displayMode == DS_DISPLAY_TYPE_MAIN) || (this->_displayMode == DS_DISPLAY_TYPE_DUAL); + const bool loadTouchScreen = (this->_displayMode == DS_DISPLAY_TYPE_TOUCH) || (this->_displayMode == DS_DISPLAY_TYPE_DUAL); - this->_isTexVideoInputDataNative[0] = ( (w0 == GPU_DISPLAY_WIDTH) && (h0 == GPU_DISPLAY_HEIGHT) ); - this->_isTexVideoInputDataNative[1] = ( (w1 == GPU_DISPLAY_WIDTH) && (h1 == GPU_DISPLAY_HEIGHT) ); - this->_texLoadedWidth[0] = (GLfloat)w0; - this->_texLoadedHeight[0] = (GLfloat)h0; - this->_texLoadedWidth[1] = (GLfloat)w1; - this->_texLoadedHeight[1] = (GLfloat)h1; + this->_isTexVideoInputDataNative[0] = isMainSizeNative; + this->_isTexVideoInputDataNative[1] = isTouchSizeNative; + this->_texLoadedWidth[0] = (this->_isTexVideoInputDataNative[0]) ? (GLfloat)GPU_DISPLAY_WIDTH : (GLfloat)this->_videoSrcCustomBufferWidth[0]; + this->_texLoadedHeight[0] = (this->_isTexVideoInputDataNative[0]) ? (GLfloat)GPU_DISPLAY_HEIGHT : (GLfloat)this->_videoSrcCustomBufferHeight[0]; + this->_texLoadedWidth[1] = (this->_isTexVideoInputDataNative[1]) ? (GLfloat)GPU_DISPLAY_WIDTH : (GLfloat)this->_videoSrcCustomBufferWidth[1]; + this->_texLoadedHeight[1] = (this->_isTexVideoInputDataNative[1]) ? (GLfloat)GPU_DISPLAY_HEIGHT : (GLfloat)this->_videoSrcCustomBufferHeight[1]; if (loadMainScreen) { if (this->_useDeposterize && this->_canUseShaderBasedFilters) { - if ( (this->_filterDeposterize[0]->GetSrcWidth() != w0) || (this->_filterDeposterize[0]->GetSrcHeight() != h0) ) + if ( (this->_filterDeposterize[0]->GetSrcWidth() != this->_texLoadedWidth[0]) || (this->_filterDeposterize[0]->GetSrcHeight() != this->_texLoadedHeight[0]) ) { - this->_filterDeposterize[0]->SetSrcSizeOGL(w0, h0); + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); + this->_filterDeposterize[0]->SetSrcSizeOGL(this->_texLoadedWidth[0], this->_texLoadedHeight[0]); + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); } } @@ -7189,21 +7294,24 @@ void OGLDisplayLayer::LoadFrameOGL(const uint16_t *frameData0, const uint16_t *f { if (!isUsingCPUPixelScaler || this->_useDeposterize) { - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataNativeID[0]); - glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, w0, h0, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, frameData0); + glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, this->_videoSrcNativeBuffer[0]); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); + glSetFenceAPPLE(this->_fenceTexUploadNativeID[0]); + glFlush(); } else { - RGB555ToBGRA8888Buffer(frameData0, this->_vf[0]->GetSrcBufferPtr(), w0 * h0); + RGB555ToBGRA8888Buffer(this->_videoSrcNativeBuffer[0], this->_vf[0]->GetSrcBufferPtr(), GPU_DISPLAY_WIDTH * GPU_DISPLAY_HEIGHT); } } else { glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[0]); - glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, w0, h0, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, frameData0); + glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, this->_videoSrcCustomBufferWidth[0], this->_videoSrcCustomBufferHeight[0], GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, this->_videoSrcCustomBuffer[0]); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); + glSetFenceAPPLE(this->_fenceTexUploadCustomID[0]); + glFlush(); } } @@ -7211,9 +7319,11 @@ void OGLDisplayLayer::LoadFrameOGL(const uint16_t *frameData0, const uint16_t *f { if (this->_useDeposterize && this->_canUseShaderBasedFilters) { - if ( (this->_filterDeposterize[1]->GetSrcWidth() != w1) || (this->_filterDeposterize[1]->GetSrcHeight() != h1) ) + if ( (this->_filterDeposterize[1]->GetSrcWidth() != this->_texLoadedWidth[1]) || (this->_filterDeposterize[1]->GetSrcHeight() != this->_texLoadedHeight[1]) ) { - this->_filterDeposterize[1]->SetSrcSizeOGL(w1, h1); + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); + this->_filterDeposterize[1]->SetSrcSizeOGL(this->_texLoadedWidth[1], this->_texLoadedHeight[1]); + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); } } @@ -7221,28 +7331,31 @@ void OGLDisplayLayer::LoadFrameOGL(const uint16_t *frameData0, const uint16_t *f { if (!isUsingCPUPixelScaler || this->_useDeposterize) { - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataNativeID[1]); - glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, w1, h1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, frameData1); + glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, GPU_DISPLAY_WIDTH, GPU_DISPLAY_HEIGHT, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, this->_videoSrcNativeBuffer[1]); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); + glSetFenceAPPLE(this->_fenceTexUploadNativeID[1]); + glFlush(); } else { - RGB555ToBGRA8888Buffer(frameData1, this->_vf[1]->GetSrcBufferPtr(), w1 * h1); + RGB555ToBGRA8888Buffer(this->_videoSrcNativeBuffer[1], this->_vf[1]->GetSrcBufferPtr(), GPU_DISPLAY_WIDTH * GPU_DISPLAY_HEIGHT); } } else { glBindTexture(GL_TEXTURE_RECTANGLE_ARB, this->_texVideoInputDataCustomID[1]); - glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, w1, h1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, frameData1); + glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, this->_videoSrcCustomBufferWidth[1], this->_videoSrcCustomBufferHeight[1], GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, this->_videoSrcCustomBuffer[1]); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); + glSetFenceAPPLE(this->_fenceTexUploadCustomID[1]); + glFlush(); } } } void OGLDisplayLayer::ProcessOGL() { - const bool isUsingCPUPixelScaler = this->_pixelScaler != VideoFilterTypeID_None && !this->_useShaderBasedPixelScaler; + const bool isUsingCPUPixelScaler = (this->_pixelScaler != VideoFilterTypeID_None) && !this->_useShaderBasedPixelScaler; const int displayMode = this->GetMode(); // Source @@ -7251,8 +7364,13 @@ void OGLDisplayLayer::ProcessOGL() if (this->_useDeposterize) { + // For all shader-based filters, we need to temporarily disable GL_UNPACK_CLIENT_STORAGE_APPLE. + // Filtered images are supposed to remain on the GPU for immediate use for further GPU processing, + // so using client-backed buffers for filtered images would simply waste memory here. + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); texVideoSourceID[0] = this->_filterDeposterize[0]->RunFilterOGL(texVideoSourceID[0]); texVideoSourceID[1] = this->_filterDeposterize[1]->RunFilterOGL(texVideoSourceID[1]); + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); if (isUsingCPUPixelScaler) // Hybrid CPU/GPU-based path (may cause a performance hit on pixel download) { @@ -7281,7 +7399,10 @@ void OGLDisplayLayer::ProcessOGL() { if (this->_useShaderBasedPixelScaler) { + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); texVideoPixelScalerID[0] = this->_shaderFilter[0]->RunFilterOGL(texVideoSourceID[0]); + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); + w0 = this->_shaderFilter[0]->GetDstWidth(); h0 = this->_shaderFilter[0]->GetDstHeight(); } @@ -7292,6 +7413,8 @@ void OGLDisplayLayer::ProcessOGL() texVideoPixelScalerID[0] = this->_texCPUFilterDstID[0]; glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texVideoPixelScalerID[0]); glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, this->_vf[0]->GetDstWidth(), this->_vf[0]->GetDstHeight(), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, texData); + glSetFenceAPPLE(this->_fenceTexUploadNativeID[0]); + glFlush(); w0 = this->_vf[0]->GetDstWidth(); h0 = this->_vf[0]->GetDstHeight(); @@ -7304,7 +7427,10 @@ void OGLDisplayLayer::ProcessOGL() { if (this->_useShaderBasedPixelScaler) { + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); texVideoPixelScalerID[1] = this->_shaderFilter[1]->RunFilterOGL(texVideoSourceID[1]); + glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); + w1 = this->_shaderFilter[1]->GetDstWidth(); h1 = this->_shaderFilter[1]->GetDstHeight(); } @@ -7315,6 +7441,8 @@ void OGLDisplayLayer::ProcessOGL() texVideoPixelScalerID[1] = this->_texCPUFilterDstID[1]; glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texVideoPixelScalerID[1]); glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, this->_vf[1]->GetDstWidth(), this->_vf[1]->GetDstHeight(), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, texData); + glSetFenceAPPLE(this->_fenceTexUploadNativeID[1]); + glFlush(); w1 = this->_vf[1]->GetDstWidth(); h1 = this->_vf[1]->GetDstHeight(); @@ -7381,3 +7509,9 @@ void OGLDisplayLayer::RenderOGL() // Disable vertex attributes glBindVertexArrayDESMUME(0); } + +void OGLDisplayLayer::FinishOGL() +{ + glFinishFenceAPPLE( (this->_fenceTexUploadNativeID[0]) ? this->_fenceTexUploadNativeID[0] : this->_fenceTexUploadCustomID[0] ); + glFinishFenceAPPLE( (this->_fenceTexUploadNativeID[1]) ? this->_fenceTexUploadNativeID[1] : this->_fenceTexUploadCustomID[1] ); +} diff --git a/desmume/src/cocoa/OGLDisplayOutput.h b/desmume/src/cocoa/OGLDisplayOutput.h index 1b78fa205..805aa84e2 100644 --- a/desmume/src/cocoa/OGLDisplayOutput.h +++ b/desmume/src/cocoa/OGLDisplayOutput.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2015 DeSmuME team + Copyright (C) 2014-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -279,6 +279,7 @@ public: virtual void ProcessOGL() = 0; virtual void RenderOGL() = 0; + virtual void FinishOGL() {}; }; typedef struct @@ -390,7 +391,18 @@ protected: GLfloat _texLoadedWidth[2]; GLfloat _texLoadedHeight[2]; + uint16_t *_videoSrcBufferHead; + size_t _videoSrcBufferSize; + uint16_t *_videoSrcNativeBuffer[2]; + uint16_t *_videoSrcCustomBuffer[2]; + GLsizei _videoSrcCustomBufferWidth[2]; + GLsizei _videoSrcCustomBufferHeight[2]; + + GLuint _fenceTexUploadNativeID[2]; + GLuint _fenceTexUploadCustomID[2]; + uint32_t *_vfMasterDstBuffer; + size_t _vfMasterDstBufferSize; VideoFilter *_vf[2]; GLuint _texCPUFilterDstID[2]; @@ -423,6 +435,7 @@ protected: GLint _uniformFinalOutputViewSize; void UploadHQnxLUTs(); + void DetermineTextureStorageHints(GLint &videoSrcTexStorageHint, GLint &cpuFilterTexStorageHint); void ResizeCPUPixelScalerOGL(const size_t srcWidthMain, const size_t srcHeightMain, const size_t srcWidthTouch, const size_t srcHeightTouch, const size_t scaleMultiply, const size_t scaleDivide); void UploadVerticesOGL(); @@ -437,6 +450,12 @@ public: OGLDisplayLayer(OGLVideoOutput *oglVO); virtual ~OGLDisplayLayer(); + void SetVideoBuffers(const void *videoBufferHead, + const void *nativeBuffer0, + const void *nativeBuffer1, + const void *customBuffer0, const size_t customWidth0, const size_t customHeight0, + const void *customBuffer1, const size_t customWidth1, const size_t customHeight1); + bool GetFiltersPreferGPU(); void SetFiltersPreferGPUOGL(bool preferGPU); @@ -465,10 +484,11 @@ public: void SetPixelScalerOGL(const int filterID); bool SetGPUPixelScalerOGL(const VideoFilterTypeID filterID); void SetCPUPixelScalerOGL(const VideoFilterTypeID filterID); - void LoadFrameOGL(const uint16_t *frameData0, const uint16_t *frameData1, GLsizei w0, GLsizei h0, GLsizei w1, GLsizei h1); + void LoadFrameOGL(bool isMainSizeNative, bool isTouchSizeNative); virtual void ProcessOGL(); virtual void RenderOGL(); + virtual void FinishOGL(); }; class OGLVideoOutput @@ -493,6 +513,7 @@ public: void ProcessOGL(); void RenderOGL(); void SetViewportSizeOGL(GLsizei w, GLsizei h); + void FinishOGL(); }; OGLInfo* OGLInfoCreate_Legacy(); diff --git a/desmume/src/cocoa/cocoa_GPU.mm b/desmume/src/cocoa/cocoa_GPU.mm index 224920219..85898d98d 100644 --- a/desmume/src/cocoa/cocoa_GPU.mm +++ b/desmume/src/cocoa/cocoa_GPU.mm @@ -1,5 +1,5 @@ /* - Copyright (C) 2013-2015 DeSmuME team + Copyright (C) 2013-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,6 +60,9 @@ public: void Render3DLock(); void Render3DUnlock(); + void FrameFinish(); + void SetVideoBuffers(); + pthread_rwlock_t* GetFrameRWLock(); NSMutableArray* GetOutputList(); void SetOutputList(NSMutableArray *outputList, pthread_mutex_t *theMutex); @@ -181,9 +184,13 @@ public: - (void) setGpuDimensions:(NSSize)theDimensions { + gpuEvent->FrameFinish(); gpuEvent->FramebufferLockWrite(); gpuEvent->Render3DLock(); + GPU->SetCustomFramebufferSize(theDimensions.width, theDimensions.height); + + gpuEvent->SetVideoBuffers(); gpuEvent->Render3DUnlock(); gpuEvent->FramebufferUnlock(); } @@ -780,6 +787,56 @@ void GPUEventHandlerOSX::Render3DUnlock() pthread_mutex_unlock(&this->_mutex3DRender); } +void GPUEventHandlerOSX::FrameFinish() +{ +#if !defined(PORT_VERSION_OPENEMU) + if (this->_mutexOutputList != NULL) + { + pthread_mutex_lock(this->_mutexOutputList); + } + + NSMutableArray *outputList = this->_cdsOutputList; + + for (CocoaDSOutput *cdsOutput in outputList) + { + if ([cdsOutput isKindOfClass:[CocoaDSDisplay class]]) + { + [(CocoaDSDisplay *)cdsOutput finishFrame]; + } + } + + if (this->_mutexOutputList != NULL) + { + pthread_mutex_unlock(this->_mutexOutputList); + } +#endif +} + +void GPUEventHandlerOSX::SetVideoBuffers() +{ +#if !defined(PORT_VERSION_OPENEMU) + if (this->_mutexOutputList != NULL) + { + pthread_mutex_lock(this->_mutexOutputList); + } + + NSMutableArray *outputList = this->_cdsOutputList; + + for (CocoaDSOutput *cdsOutput in outputList) + { + if ([cdsOutput isKindOfClass:[CocoaDSDisplayVideo class]]) + { + [(CocoaDSDisplayVideo *)cdsOutput resetVideoBuffers]; + } + } + + if (this->_mutexOutputList != NULL) + { + pthread_mutex_unlock(this->_mutexOutputList); + } +#endif +} + pthread_rwlock_t* GPUEventHandlerOSX::GetFrameRWLock() { return &this->_rwlockFrame; diff --git a/desmume/src/cocoa/cocoa_output.h b/desmume/src/cocoa/cocoa_output.h index 0b8539dc9..1aeb49c59 100644 --- a/desmume/src/cocoa/cocoa_output.h +++ b/desmume/src/cocoa/cocoa_output.h @@ -1,6 +1,6 @@ /* Copyright (C) 2011 Roger Manuel - Copyright (C) 2011-2015 DeSmuME team + Copyright (C) 2011-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -105,6 +105,7 @@ typedef struct @protocol CocoaDSDisplayDelegate @required +- (void) doFinishFrame; - (void) doDisplayModeChanged:(NSInteger)displayModeID; @optional @@ -116,12 +117,19 @@ typedef struct @required - (void) doInitVideoOutput:(NSDictionary *)properties; -- (void) doLoadVideoFrameWithMainBuffer:(const void *)mainBuffer - touchBuffer:(const void *)touchBuffer - mainWidth:(const NSInteger)mainWidth - mainHeight:(const NSInteger)mainHeight - touchWidth:(const NSInteger)touchWidth - touchHeight:(const NSInteger)touchHeight; + +- (void) doSetVideoBuffers:(const uint16_t *)videoBufferHead + nativeBuffer0:(const uint16_t *)nativeBuffer0 + nativeBuffer1:(const uint16_t *)nativeBuffer1 + customBuffer0:(const uint16_t *)customBuffer0 + customWidth0:(const size_t)customWidth0 + customHeight0:(const size_t)customHeight0 + customBuffer1:(const uint16_t *)customBuffer1 + customWidth1:(const size_t)customWidth1 + customHeight1:(const size_t)customHeight1; + +- (void) doLoadVideoFrameWithMainSizeNative:(bool)isMainSizeNative touchSizeNative:(bool)isTouchSizeNative; + - (void) doProcessVideoFrameWithInfo:(const NDSFrameInfo &)frameInfo; @optional @@ -165,6 +173,7 @@ typedef struct - (void) handleRequestScreenshot:(NSData *)fileURLStringData fileTypeData:(NSData *)fileTypeData; - (void) handleCopyToPasteboard; +- (void) finishFrame; - (void) takeFrameCount; - (void) setCPULoadAvgARM9:(uint32_t)loadAvgARM9 ARM7:(uint32_t)loadAvgARM7; - (NSImage *) image; @@ -174,7 +183,9 @@ typedef struct @interface CocoaDSDisplayVideo : CocoaDSDisplay { - + uint16_t *_videoBuffer; + uint16_t *_nativeBuffer[2]; + uint16_t *_customBuffer[2]; } - (void) handleReceiveGPUFrame; @@ -187,4 +198,6 @@ typedef struct - (void) handleChangeDisplayOrder:(NSData *)displayOrderIdData; - (void) handleChangeDisplayGap:(NSData *)displayGapScalarData; +- (void) resetVideoBuffers; + @end diff --git a/desmume/src/cocoa/cocoa_output.mm b/desmume/src/cocoa/cocoa_output.mm index 8d423f04a..9f39c8daf 100644 --- a/desmume/src/cocoa/cocoa_output.mm +++ b/desmume/src/cocoa/cocoa_output.mm @@ -1,6 +1,6 @@ /* Copyright (C) 2011 Roger Manuel - Copyright (C) 2011-2015 DeSmuME team + Copyright (C) 2011-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,6 +24,7 @@ #include "sndOSX.h" #include "../NDSSystem.h" +#include "../common.h" #include "../GPU.h" #include "../gfx3d.h" #include "../SPU.h" @@ -675,6 +676,11 @@ [pboard setData:[screenshot TIFFRepresentationUsingCompression:NSTIFFCompressionLZW factor:1.0f] forType:NSTIFFPboardType]; } +- (void) finishFrame +{ + [(id)delegate doFinishFrame]; +} + - (void) takeFrameCount { OSSpinLockLock(&spinlockReceivedFrameIndex); @@ -770,6 +776,13 @@ return self; } + _videoBuffer = NULL; + _nativeBuffer[NDSDisplayID_Main] = NULL; + _nativeBuffer[NDSDisplayID_Touch] = NULL; + _customBuffer[NDSDisplayID_Main] = NULL; + _customBuffer[NDSDisplayID_Touch] = NULL; + [self resetVideoBuffers]; + [property setValue:[NSNumber numberWithInteger:(NSInteger)VideoFilterTypeID_None] forKey:@"videoFilterType"]; [property setValue:[CocoaVideoFilter typeStringByID:VideoFilterTypeID_None] forKey:@"videoFilterTypeString"]; @@ -778,6 +791,12 @@ - (void)dealloc { + free_aligned(_videoBuffer); + _nativeBuffer[NDSDisplayID_Main] = NULL; + _nativeBuffer[NDSDisplayID_Touch] = NULL; + _customBuffer[NDSDisplayID_Main] = NULL; + _customBuffer[NDSDisplayID_Touch] = NULL; + [super dealloc]; } @@ -861,11 +880,11 @@ - (void) handleReceiveGPUFrame { [super handleReceiveGPUFrame]; + [self finishFrame]; pthread_rwlock_rdlock(self.rwlockProducer); const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); - const NSInteger dispMode = [self displayMode]; const uint16_t newGpuWidth = dispInfo.customWidth; const uint16_t newGpuHeight = dispInfo.customHeight; @@ -880,17 +899,34 @@ _gpuCurrentHeight = newGpuHeight; } - void *mainFramebuffer = (dispMode == DS_DISPLAY_TYPE_MAIN || dispMode == DS_DISPLAY_TYPE_DUAL) ? dispInfo.renderedBuffer[NDSDisplayID_Main] : NULL; - void *touchFramebuffer = (dispMode == DS_DISPLAY_TYPE_TOUCH || dispMode == DS_DISPLAY_TYPE_DUAL) ? dispInfo.renderedBuffer[NDSDisplayID_Touch] : NULL; + const bool isMainSizeNative = !dispInfo.didPerformCustomRender[NDSDisplayID_Main]; + const bool isTouchSizeNative = !dispInfo.didPerformCustomRender[NDSDisplayID_Touch]; - [(id)delegate doLoadVideoFrameWithMainBuffer:mainFramebuffer - touchBuffer:touchFramebuffer - mainWidth:dispInfo.renderedWidth[NDSDisplayID_Main] - mainHeight:dispInfo.renderedHeight[NDSDisplayID_Main] - touchWidth:dispInfo.renderedWidth[NDSDisplayID_Touch] - touchHeight:dispInfo.renderedHeight[NDSDisplayID_Touch]]; + if (isMainSizeNative && isTouchSizeNative) + { + memcpy(_nativeBuffer[NDSDisplayID_Main], dispInfo.masterNativeBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2 * sizeof(uint16_t)); + } + else + { + if (!isMainSizeNative && !isTouchSizeNative) + { + memcpy(_customBuffer[NDSDisplayID_Main], dispInfo.masterCustomBuffer, dispInfo.customWidth * dispInfo.customHeight * 2 * sizeof(uint16_t)); + } + else if (isTouchSizeNative) + { + memcpy(_customBuffer[NDSDisplayID_Main], dispInfo.customBuffer[NDSDisplayID_Main], dispInfo.customWidth * dispInfo.customHeight * sizeof(uint16_t)); + memcpy(_nativeBuffer[NDSDisplayID_Touch], dispInfo.nativeBuffer[NDSDisplayID_Touch], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(uint16_t)); + } + else + { + memcpy(_nativeBuffer[NDSDisplayID_Main], dispInfo.nativeBuffer[NDSDisplayID_Main], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(uint16_t)); + memcpy(_customBuffer[NDSDisplayID_Touch], dispInfo.customBuffer[NDSDisplayID_Touch], dispInfo.customWidth * dispInfo.customHeight * sizeof(uint16_t)); + } + } pthread_rwlock_unlock(self.rwlockProducer); + + [(id)delegate doLoadVideoFrameWithMainSizeNative:isMainSizeNative touchSizeNative:isTouchSizeNative]; } - (void) handleResizeView:(NSData *)rectData @@ -968,4 +1004,29 @@ [(id)delegate doDisplayGapChanged:gapScalar]; } +- (void) resetVideoBuffers +{ + const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); + uint16_t *oldVideoBuffer = _videoBuffer; + uint16_t *newVideoBuffer = (uint16_t *)malloc_alignedCacheLine( ((GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT) + (dispInfo.customWidth * dispInfo.customHeight)) * 2 * sizeof(uint16_t) ); + + [(id)delegate doSetVideoBuffers:newVideoBuffer + nativeBuffer0:newVideoBuffer + nativeBuffer1:newVideoBuffer + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT) + customBuffer0:newVideoBuffer + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2) + customWidth0:dispInfo.customWidth + customHeight0:dispInfo.customHeight + customBuffer1:newVideoBuffer + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2) + (dispInfo.customWidth * dispInfo.customHeight) + customWidth1:dispInfo.customWidth + customHeight1:dispInfo.customHeight]; + + _videoBuffer = newVideoBuffer; + _nativeBuffer[NDSDisplayID_Main] = newVideoBuffer; + _nativeBuffer[NDSDisplayID_Touch] = newVideoBuffer + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + _customBuffer[NDSDisplayID_Main] = newVideoBuffer + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); + _customBuffer[NDSDisplayID_Touch] = newVideoBuffer + (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2) + (dispInfo.customWidth * dispInfo.customHeight); + + free_aligned(oldVideoBuffer); +} + @end diff --git a/desmume/src/cocoa/userinterface/DisplayWindowController.mm b/desmume/src/cocoa/userinterface/DisplayWindowController.mm index 64c1ee3bf..5ee9cd3f8 100644 --- a/desmume/src/cocoa/userinterface/DisplayWindowController.mm +++ b/desmume/src/cocoa/userinterface/DisplayWindowController.mm @@ -1,5 +1,5 @@ /* - Copyright (C) 2013-2015 DeSmuME team + Copyright (C) 2013-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1209,6 +1209,7 @@ static std::unordered_map _screenMap; // // Set up the video output thread. cdsVideoOutput = [[CocoaDSDisplayVideo alloc] init]; [cdsVideoOutput setDelegate:view]; + [cdsVideoOutput resetVideoBuffers]; // Add the video thread to the output list. [emuControl addOutputToCore:cdsVideoOutput]; @@ -1466,15 +1467,15 @@ static std::unordered_map _screenMap; // NSString *fontPath = [[NSBundle mainBundle] pathForResource:@"SourceSansPro-Semibold" ofType:@"otf"]; oglv->GetHUDLayer()->SetFontUsingPath([fontPath cStringUsingEncoding:NSUTF8StringEncoding]); - oglv->GetDisplayLayer()->SetFiltersPreferGPUOGL(true); - oglv->GetDisplayLayer()->SetSourceDeposterize(false); - oglv->GetDisplayLayer()->SetOutputFilterOGL(OutputFilterTypeID_Bilinear); - oglv->GetDisplayLayer()->SetPixelScalerOGL(VideoFilterTypeID_None); - CGLSetCurrentContext(prevContext); - OGLDisplayLayer *displayLayer = oglv->GetDisplayLayer(); + displayLayer->SetFiltersPreferGPUOGL(true); + displayLayer->SetSourceDeposterize(false); + displayLayer->SetOutputFilterOGL(OutputFilterTypeID_Bilinear); + displayLayer->SetPixelScalerOGL(VideoFilterTypeID_None); canUseShaderBasedFilters = (displayLayer->CanUseShaderBasedFilters()) ? YES : NO; + CGLSetCurrentContext(prevContext); + _useVerticalSync = NO; spinlockIsHUDVisible = OS_SPINLOCK_INIT; @@ -2089,22 +2090,49 @@ static std::unordered_map _screenMap; // // No init needed, so do nothing. } -- (void) doLoadVideoFrameWithMainBuffer:(const void *)mainBuffer - touchBuffer:(const void *)touchBuffer - mainWidth:(const NSInteger)mainWidth - mainHeight:(const NSInteger)mainHeight - touchWidth:(const NSInteger)touchWidth - touchHeight:(const NSInteger)touchHeight +- (void)doLoadVideoFrameWithMainSizeNative:(bool)isMainSizeNative touchSizeNative:(bool)isTouchSizeNative { OGLDisplayLayer *displayLayer = oglv->GetDisplayLayer(); CGLLockContext(cglDisplayContext); CGLSetCurrentContext(cglDisplayContext); - displayLayer->LoadFrameOGL((const uint16_t *)mainBuffer, (const uint16_t *)touchBuffer, mainWidth, mainHeight, touchWidth, touchHeight); + displayLayer->LoadFrameOGL(isMainSizeNative, isTouchSizeNative); displayLayer->ProcessOGL(); CGLUnlockContext(cglDisplayContext); } +- (void)doSetVideoBuffers:(const uint16_t *)videoBufferHead + nativeBuffer0:(const uint16_t *)nativeBuffer0 + nativeBuffer1:(const uint16_t *)nativeBuffer1 + customBuffer0:(const uint16_t *)customBuffer0 + customWidth0:(const size_t)customWidth0 + customHeight0:(const size_t)customHeight0 + customBuffer1:(const uint16_t *)customBuffer1 + customWidth1:(const size_t)customWidth1 + customHeight1:(const size_t)customHeight1 +{ + OGLDisplayLayer *displayLayer = oglv->GetDisplayLayer(); + + CGLLockContext(cglDisplayContext); + CGLSetCurrentContext(cglDisplayContext); + displayLayer->SetVideoBuffers(videoBufferHead, + nativeBuffer0, + nativeBuffer1, + customBuffer0, customWidth0, customHeight0, + customBuffer1, customWidth1, customHeight1); + CGLUnlockContext(cglDisplayContext); +} + +- (void)doFinishFrame +{ + OGLDisplayLayer *displayLayer = oglv->GetDisplayLayer(); + + CGLLockContext(cglDisplayContext); + CGLSetCurrentContext(cglDisplayContext); + displayLayer->FinishOGL(); + CGLUnlockContext(cglDisplayContext); +} + - (void)doProcessVideoFrameWithInfo:(const NDSFrameInfo &)frameInfo { OGLHUDLayer *hudLayer = oglv->GetHUDLayer();