SPU LLVM: More compilation threads

2024-03-07 18:52:25 +02:00 · 2024-03-07 18:52:25 +02:00 · adc8a360ad
parent 719dafa679
commit adc8a360ad
3 changed files with 48 additions and 3 deletions
--- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp
@ -4501,6 +4501,8 @@ struct spu_llvm_worker
 		// Fake LS
 		std::vector<be_t<u32>> ls(0x10000);

+		bool set_relax_flag = false;
+
 		for (auto slice = registered.pop_all();; [&]
 		{
 			if (slice)
@ -4513,6 +4515,12 @@ struct spu_llvm_worker
 				return;
 			}

+			if (set_relax_flag)
+			{
+				spu_thread::g_spu_work_count--;
+				set_relax_flag = false;
+			}
+
 			thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&registered)[1], 0);
 			slice = registered.pop_all();
 		}())
@ -4534,6 +4542,12 @@ struct spu_llvm_worker
 				break;
 			}

+			if (!set_relax_flag)
+			{
+				spu_thread::g_spu_work_count++;
+				set_relax_flag = true;
+			}
+
 			const auto& func = *prog->second;

 			// Get data start
@ -4575,12 +4589,18 @@ struct spu_llvm_worker
 			else
 			{
 				spu_log.fatal("[0x%05x] Compilation failed.", func.entry_point);
-				return;
+				break;
 			}

 			// Clear fake LS
 			std::memset(ls.data() + start / 4, 0, 4 * (size0 - 1));
 		}
+
+		if (set_relax_flag)
+		{
+			spu_thread::g_spu_work_count--;
+			set_relax_flag = false;
+		}
 	}
 };

@ -4654,7 +4674,11 @@ struct spu_llvm

 		if (uint hc = utils::get_thread_count(); hc >= 12)
 		{
-			worker_count = hc - 10;
+			worker_count = hc - 12 + 3;
+		}
+		else if (hc >= 6)
+		{
+			worker_count = 2;
 		}

 		u32 worker_index = 0;
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@ -5253,10 +5253,12 @@ s64 spu_thread::get_ch_value(u32 ch)
 			}
 		}

+		const bool seed = (utils::get_tsc() >> 8) % 100;
+
 #ifdef __linux__
 		const bool reservation_busy_waiting = false;
 #else
-		const bool reservation_busy_waiting = ((utils::get_tsc() >> 8) % 100 + ((raddr == spurs_addr) ? 50 : 0)) < g_cfg.core.spu_reservation_busy_waiting_percentage;
+		const bool reservation_busy_waiting = (seed + ((raddr == spurs_addr) ? 50 : 0)) < g_cfg.core.spu_reservation_busy_waiting_percentage;
 #endif

 		for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true))
@ -5281,6 +5283,23 @@ s64 spu_thread::get_ch_value(u32 ch)
 				// Don't busy-wait with TSX - memory is sensitive
 				if (g_use_rtm || !reservation_busy_waiting)
 				{
+					if (u32 work_count = g_spu_work_count)
+					{
+						const u32 true_free = utils::sub_saturate<u32>(utils::get_thread_count(), 10);
+
+						if (work_count > true_free)
+						{
+							// SPU thread count estimation
+							const u32 thread_count = (group ? g_raw_spu_ctr + group->max_num : g_raw_spu_ctr + 3);
+
+							if (thread_count && seed % thread_count < work_count - true_free)
+							{
+								// Make the SPU wait longer for other threads to do the work
+								thread_ctrl::wait_for(200);
+								continue;
+							}
+						}
+					}
 #ifdef __linux__
 					vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{50'000});
 #else
@ -6795,3 +6814,4 @@ void fmt_class_string<spu_channel_4_t>::format(std::string& out, u64 arg)

 DECLARE(spu_thread::g_raw_spu_ctr){};
 DECLARE(spu_thread::g_raw_spu_id){};
+DECLARE(spu_thread::g_spu_work_count){};
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@ -889,6 +889,7 @@ public:

 	static atomic_t<u32> g_raw_spu_ctr;
 	static atomic_t<u32> g_raw_spu_id[5];
+	static atomic_t<u32> g_spu_work_count;

 	static u32 find_raw_spu(u32 id)
 	{