Protect vram pages only when needed. Use lambda for xbrz paralleliz.

Each vram page is now protected only once. Get rid of std:function and std::bind and use lambda expressions.
2019-09-30 15:31:06 +02:00 · 2019-09-30 15:31:06 +02:00 · 6ab8b185c8
parent 0595bae85b
commit 6ab8b185c8
1 changed files with 76 additions and 66 deletions
--- a/core/rend/TexCache.cpp
+++ b/core/rend/TexCache.cpp
@ -1,5 +1,3 @@
-#include <list>
-#include <functional>
 #ifndef TARGET_NO_OPENMP
 #include <omp.h>
 #endif
@ -132,17 +130,17 @@ VArray2 vram;  // vram 32-64b
 //
 void vramlock_list_remove(vram_block* block)
 {
-	u32 base = block->start/PAGE_SIZE;
-	u32 end = block->end/PAGE_SIZE;
+	u32 base = block->start / PAGE_SIZE;
+	u32 end = block->end / PAGE_SIZE;

-	for (u32 i=base;i<=end;i++)
+	for (u32 i = base; i <= end; i++)
 	{
-		vector<vram_block*>* list=&VramLocks[i];
-		for (size_t j=0;j<list->size();j++)
+		vector<vram_block*>& list = VramLocks[i];
+		for (size_t j = 0; j < list.size(); j++)
 		{
-			if ((*list)[j]==block)
+			if (list[j] == block)
 			{
-				(*list)[j]=0;
+				list[j] = nullptr;
 			}
 		}
 	}
@ -150,23 +148,31 @@ void vramlock_list_remove(vram_block* block)
 
 void vramlock_list_add(vram_block* block)
 {
-	u32 base = block->start/PAGE_SIZE;
-	u32 end = block->end/PAGE_SIZE;
+	u32 base = block->start / PAGE_SIZE;
+	u32 end = block->end / PAGE_SIZE;


-	for (u32 i=base;i<=end;i++)
+	for (u32 i = base; i <= end; i++)
 	{
-		vector<vram_block*>* list=&VramLocks[i];
-		for (u32 j=0;j<list->size();j++)
+		vector<vram_block*>& list = VramLocks[i];
+		// If the list is empty then we need to protect vram, otherwise it's already been done
+		if (list.empty())
 		{
-			if ((*list)[j]==0)
+			_vmem_protect_vram(i * PAGE_SIZE, PAGE_SIZE);
+		}
+		else
+		{
+			for (u32 j = 0; j < list.size(); j++)
 			{
-				(*list)[j]=block;
-				goto added_it;
+				if (list[j] == nullptr)
+				{
+					list[j] = block;
+					goto added_it;
+				}
 			}
 		}

-		list->push_back(block);
+		list.push_back(block);
 added_it:
 		i=i;
 	}
@ -200,10 +206,10 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user

 	{
 		vramlist_lock.Lock();
-	
-		_vmem_protect_vram(block->start, block->len);
+
+		// This also protects vram if needed
 		vramlock_list_add(block);
-		
+
 		vramlist_lock.Unlock();
 	}

@ -212,40 +218,37 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user

 bool VramLockedWriteOffset(size_t offset)
 {
-	if (offset<VRAM_SIZE)
-	{
-
-		size_t addr_hash = offset/PAGE_SIZE;
-		vector<vram_block*>* list=&VramLocks[addr_hash];
-		
-		{
-			vramlist_lock.Lock();
-
-			for (size_t i=0;i<list->size();i++)
-			{
-				if ((*list)[i])
-				{
-					libPvr_LockedBlockWrite((*list)[i],(u32)offset);
-				
-					if ((*list)[i])
-					{
-						ERROR_LOG(PVR, "Error : pvr is supposed to remove lock");
-						die("Invalid state");
-					}
-
-				}
-			}
-			list->clear();
-
-			_vmem_unprotect_vram((u32)(offset & ~PAGE_MASK), PAGE_SIZE);
-
-			vramlist_lock.Unlock();
-		}
-
-		return true;
-	}
-	else
+	if (offset >= VRAM_SIZE)
 		return false;
+
+	size_t addr_hash = offset / PAGE_SIZE;
+	vector<vram_block *>& list = VramLocks[addr_hash];
+
+	{
+		vramlist_lock.Lock();
+
+		for (size_t i = 0; i < list.size(); i++)
+		{
+			if (list[i] != nullptr)
+			{
+				libPvr_LockedBlockWrite(list[i], (u32)offset);
+
+				if (list[i] != nullptr)
+				{
+					ERROR_LOG(PVR, "Error : pvr is supposed to remove lock");
+					die("Invalid state");
+				}
+
+			}
+		}
+		list.clear();
+
+		_vmem_unprotect_vram((u32)(offset & ~PAGE_MASK), PAGE_SIZE);
+
+		vramlist_lock.Unlock();
+	}
+
+	return true;
 }

 bool VramLockedWrite(u8* address)
@ -270,7 +273,6 @@ void libCore_vramlock_Unlock_block_wb(vram_block* block)
 	if (mmu_enabled())
 		vmem32_unprotect_vram(block->start, block->len);
 	vramlock_list_remove(block);
-	//more work needed
 	free(block);
 }

@ -340,12 +342,18 @@ static void deposterizeV(u32* data, u32* out, int w, int h, int l, int u) {
 }

 #ifndef TARGET_NO_OPENMP
-void parallelize(const std::function<void(int,int)> &func, int start, int end, int width /* = 0 */)
+static inline int getThreadCount()
 {
 	int tcount = omp_get_num_procs() - 1;
 	if (tcount < 1)
 		tcount = 1;
-	tcount = min(tcount, (int)settings.pvr.MaxThreads);
+	return min(tcount, (int)settings.pvr.MaxThreads);
+}
+
+template<typename Func>
+void parallelize(Func func, int start, int end)
+{
+	int tcount = getThreadCount();
 #pragma omp parallel num_threads(tcount)
 	{
 		int num_threads = omp_get_num_threads();
@ -360,22 +368,24 @@ void parallelize(const std::function<void(int,int)> &func, int start, int end, i
 void DePosterize(u32* source, u32* dest, int width, int height) {
 	u32 *tmpbuf = (u32 *)malloc(width * height * sizeof(u32));

-	parallelize(std::bind(&deposterizeH, source, tmpbuf, width, std::placeholders::_1, std::placeholders::_2), 0, height, width);
-	parallelize(std::bind(&deposterizeV, tmpbuf, dest, width, height, std::placeholders::_1, std::placeholders::_2), 0, height, width);
-	parallelize(std::bind(&deposterizeH, dest, tmpbuf, width, std::placeholders::_1, std::placeholders::_2), 0, height, width);
-	parallelize(std::bind(&deposterizeV, tmpbuf, dest, width, height, std::placeholders::_1, std::placeholders::_2), 0, height, width);
+	parallelize([source, tmpbuf, width](int start, int end) { deposterizeH(source, tmpbuf, width, start, end); }, 0, height);
+	parallelize([tmpbuf, dest, width, height](int start, int end) { deposterizeV(tmpbuf, dest, width, height, start, end); }, 0, height);
+	parallelize([dest, tmpbuf, width](int start, int end) { deposterizeH(dest, tmpbuf, width, start, end); }, 0, height);
+	parallelize([tmpbuf, dest, width, height](int start, int end) { deposterizeV(tmpbuf, dest, width, height, start, end); }, 0, height);

 	free(tmpbuf);
 }
 #endif

-struct xbrz::ScalerCfg xbrz_cfg;
+static struct xbrz::ScalerCfg xbrz_cfg;

-void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool has_alpha) {
+void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool has_alpha)
+{
 #ifndef TARGET_NO_OPENMP
-	parallelize(
-			std::bind(&xbrz::scale, factor, source, dest, width, height, has_alpha ? xbrz::ColorFormat::ARGB : xbrz::ColorFormat::RGB, xbrz_cfg,
-					std::placeholders::_1, std::placeholders::_2), 0, height, width);
+	parallelize([=](int start, int end) {
+		xbrz::scale(factor, source, dest, width, height, has_alpha ? xbrz::ColorFormat::ARGB : xbrz::ColorFormat::RGB,
+				xbrz_cfg, start, end);
+	}, 0, height);
 #else
 	xbrz::scale(factor, source, dest, width, height, has_alpha ? xbrz::ColorFormat::ARGB : xbrz::ColorFormat::RGB, xbrz_cfg);
 #endif