Merge branch 'greg/mtvu-mtgs-lockless'

2017-01-18 19:12:26 +01:00 · 2017-01-18 19:12:26 +01:00 · 21612cafc1
parent 671a575216 24c780c884
commit 21612cafc1
7 changed files with 157 additions and 100 deletions
--- a/common/include/Utilities/Dependencies.h
+++ b/common/include/Utilities/Dependencies.h
@ -225,6 +225,7 @@ public:
 #include <algorithm>
 #include <memory>
 #include <atomic>
+#include <thread>

 #include "Pcsx2Defs.h"

--- a/pcsx2/Gif_Unit.h
+++ b/pcsx2/Gif_Unit.h
@ -17,6 +17,11 @@
 #include <deque>
 #include "System/SysThreads.h"
 #include "Gif.h"
+#include "GS.h"
+
+// FIXME common path ?
+#include "../plugins/GSdx/boost_spsc_queue.hpp"
+
 struct GS_Packet;
 extern void Gif_MTGS_Wait(bool isMTVU);
 extern void Gif_FinishIRQ();
@ -118,11 +123,13 @@ struct Gif_Tag {
 };

 struct GS_Packet {
+	// PERF note: this struct is copied various time in hot path. Don't add
+	// new field
+
 	u32  offset;     // Path buffer offset for start of packet
 	u32  size;	     // Full size of GS-Packet
 	s32  cycles;     // EE Cycles taken to process this GS packet
 	s32  readAmount; // Dummy read-amount data needed for proper buffer calculations
-	bool done;	     // 0 = Incomplete, 1 = Complete
 	GS_Packet()  { Reset(); }
 	void Reset() { memzero(*this); }
 };
@ -147,12 +154,14 @@ static __fi void incTag(u32& offset, u32& size, u32 incAmount) {
 struct Gif_Path_MTVU {
 	u32   fakePackets; // Fake packets pending to be sent to MTGS
 	GS_Packet fakePacket;
-	Mutex gsPackMutex; // Used for atomic access to gsPackQueue
-	std::deque<GS_Packet> gsPackQueue; // VU1 programs' XGkick(s)
+	// Set a size based on MTGS but keep a factor 2 to avoid too waste to much
+	// memory overhead. Note the struct is instantied 3 times (for each gif
+	// path)
+	ringbuffer_base<GS_Packet, RingBufferSize / 2> gsPackQueue;
 	Gif_Path_MTVU() { Reset(); }
-	void Reset()    { fakePackets = 0; gsPackQueue.clear();
+	void Reset()    { fakePackets = 0;
+		gsPackQueue.reset();
 		fakePacket.Reset();
-		fakePacket.done =  1; // Fake packets don't get processed by pcsx2
 		fakePacket.size =~0u; // Used to indicate that its a fake packet
 	}
 };
@ -200,7 +209,7 @@ struct Gif_Path {
 	}

 	bool isMTVU() const           { return !idx && THREAD_VU1; }
-	s32 getReadAmount()           { return readAmount.load() + gsPack.readAmount; }
+	s32 getReadAmount()           { return readAmount.load(std::memory_order_acquire) + gsPack.readAmount; }
 	bool hasDataRemaining() const { return curOffset < curSize; }
 	bool isDone() const           { return isMTVU() ? !mtvu.fakePackets : (!hasDataRemaining() && (state == GIF_PATH_IDLE || state == GIF_PATH_WAIT)); }

@ -256,11 +265,12 @@ struct Gif_Path {
 		curSize     += size;
 	}

-	// If completed a GS packet (with EOP) then returned GS_Packet.done = 1
+	// If completed a GS packet (with EOP) then set done to true
 	// MTVU: This function only should be called called on EE thread
-	GS_Packet ExecuteGSPacket() {
+	GS_Packet ExecuteGSPacket(bool &done) {
 		if (mtvu.fakePackets) { // For MTVU mode...
 			mtvu.fakePackets--;
+			done = true;
 			return mtvu.fakePacket;
 		}
 		pxAssert(!isMTVU());
@ -309,9 +319,8 @@ struct Gif_Path {

 			if (gifTag.tag.EOP) {
 				GS_Packet t = gsPack;
-				t.done = 1;
+				done = true;

-				
 				dmaRewind = 0;
 				
 				gsPack.Reset();
@ -380,22 +389,23 @@ struct Gif_Path {

 	// MTVU: Gets called after VU1 execution on MTVU thread
 	void FinishGSPacketMTVU() {
-		if (1) {
-			ScopedLock lock(mtvu.gsPackMutex);
-			readAmount.fetch_add(gsPack.size + gsPack.readAmount);
-			mtvu.gsPackQueue.push_back(gsPack);
-		}
+		// Performance note: fetch_add atomic operation might create some stall for atomic
+		// operation in gsPack.push
+		readAmount.fetch_add(gsPack.size + gsPack.readAmount, std::memory_order_acq_rel);
+		while (!mtvu.gsPackQueue.push(gsPack))
+			;
+
 		gsPack.Reset();
 		gsPack.offset = curOffset;
 	}

 	// MTVU: Gets called by MTGS thread
 	GS_Packet GetGSPacketMTVU() {
-		ScopedLock lock(mtvu.gsPackMutex);
-		if (mtvu.gsPackQueue.size()) {
-			GS_Packet t = mtvu.gsPackQueue[0];
-			return t; // XGkick GS packet(s)
+		// FIXME is the error path useful ?
+		if (!mtvu.gsPackQueue.empty()) {
+			return mtvu.gsPackQueue.front();
 		}
+
 		Console.Error("MTVU: Expected gsPackQueue to have elements!");
 		pxAssert(0);
 		return GS_Packet(); // gsPack.size will be 0
@ -403,18 +413,13 @@ struct Gif_Path {

 	// MTVU: Gets called by MTGS thread
 	void PopGSPacketMTVU() {
-		ScopedLock lock(mtvu.gsPackMutex);
-		if (mtvu.gsPackQueue.size()) {
-			mtvu.gsPackQueue.pop_front();
-		}
+		mtvu.gsPackQueue.pop();
 	}

 	// MTVU: Returns the amount of pending
 	// GS Packets that MTGS hasn't yet processed
 	u32 GetPendingGSPackets() {
-		ScopedLock lock(mtvu.gsPackMutex);
-		u32 t = mtvu.gsPackQueue.size();
-		return t;
+		return mtvu.gsPackQueue.size();
 	}
 };

@ -566,8 +571,9 @@ struct Gif_Unit {
 		for(;;) {
 			if (stat.APATH) { // Some Transfer is happening
 				Gif_Path& path   = gifPath[stat.APATH-1];
-				GS_Packet gsPack = path.ExecuteGSPacket();
-				if(!gsPack.done) {
+				bool done = false;
+				GS_Packet gsPack = path.ExecuteGSPacket(done);
+				if(!done) {
 					if (stat.APATH == 3 && CanDoP3Slice() && !gsSIGNAL.queued) {
 						if(!didPath3 && /*!Path3Masked() &&*/ checkPaths(1,1,0)) { // Path3 slicing
 							didPath3 = true;
--- a/pcsx2/MTGS.cpp
+++ b/pcsx2/MTGS.cpp
@ -407,7 +407,7 @@ void SysMtgsThread::ExecuteTaskInThread()
 					u32       offset = tag.data[0];
 					u32       size   = tag.data[1];
 					if (offset != ~0u) GSgifTransfer((u32*)&path.buffer[offset], size/16);
-					path.readAmount.fetch_sub(size);
+					path.readAmount.fetch_sub(size, std::memory_order_acq_rel);
 					break;
 				}

@ -421,7 +421,7 @@ void SysMtgsThread::ExecuteTaskInThread()
 					Gif_Path& path   = gifUnit.gifPath[GIF_PATH_1];
 					GS_Packet gsPack = path.GetGSPacketMTVU(); // Get vu1 program's xgkick packet(s)
 					if (gsPack.size) GSgifTransfer((u32*)&path.buffer[gsPack.offset], gsPack.size/16);
-					path.readAmount.fetch_sub(gsPack.size + gsPack.readAmount);
+					path.readAmount.fetch_sub(gsPack.size + gsPack.readAmount, std::memory_order_acq_rel);
 					path.PopGSPacketMTVU(); // Should be done last, for proper Gif_MTGS_Wait()
 					break;
 				}
--- a/pcsx2/MTVU.cpp
+++ b/pcsx2/MTVU.cpp
@ -79,11 +79,12 @@ void VU_Thread::Reset()
 {
 	ScopedLock lock(mtxBusy);

-	write_offset = 0;
 	vuCycleIdx   = 0;
-	read_pos     = 0;
 	isBusy       = false;
-	write_pos    = 0;
+	m_ato_write_pos = 0;
+	m_write_pos     = 0;
+	m_ato_read_pos  = 0;
+	m_read_pos      = 0;
 	memzero(vif);
 	memzero(vifRegs);
 	for (size_t i = 0; i < 4; ++i)
@ -102,7 +103,7 @@ void VU_Thread::ExecuteRingBuffer()
 	for(;;) {
 		semaEvent.WaitWithoutYield();
 		ScopedLockBool lock(mtxBusy, isBusy);
-		while (read_pos.load(std::memory_order_relaxed) != GetWritePos()) {
+		while (m_ato_read_pos.load(std::memory_order_relaxed) != GetWritePos()) {
 			u32 tag = Read();
 			switch (tag) {
 				case MTVU_VU_EXECUTE: {
@ -110,11 +111,12 @@ void VU_Thread::ExecuteRingBuffer()
 					s32 addr     = Read();
 					vifRegs.top  = Read();
 					vifRegs.itop = Read();
+
 					if (addr != -1) vuRegs.VI[REG_TPC].UL = addr;
 					vuCPU->Execute(vu1RunCycles);
 					gifUnit.gifPath[GIF_PATH_1].FinishGSPacketMTVU();
 					semaXGkick.Post(); // Tell MTGS a path1 packet is complete
-					vuCycles[vuCycleIdx].store(vuRegs.cycle, std::memory_order_relaxed);
+					vuCycles[vuCycleIdx].store(vuRegs.cycle, std::memory_order_release);
 					vuCycleIdx  = (vuCycleIdx + 1) & 3;
 					break;
 				}
@ -142,15 +144,17 @@ void VU_Thread::ExecuteRingBuffer()
 					Read(&vif.tag, vif_copy_size);
 					ReadRegs(&vifRegs);
 					u32 size = Read();
-					MTVU_Unpack(&buffer[read_pos.load(std::memory_order_relaxed)], vifRegs);
-					incReadPos(size_u32(size));
+					MTVU_Unpack(&buffer[m_read_pos], vifRegs);
+					m_read_pos += size_u32(size);
 					break;
 				}
 				case MTVU_NULL_PACKET:
-					read_pos.store(0, std::memory_order_release);
+					m_read_pos = 0;
 					break;
 				jNO_DEFAULT;
 			}
+
+			CommitReadPos();
 		}
 	}
 }
@ -161,12 +165,16 @@ __ri void VU_Thread::WaitOnSize(s32 size)
 {
 	for(;;) {
 		s32 readPos  = GetReadPos();
-		if (readPos <= write_pos.load(std::memory_order_relaxed)) break; // MTVU is reading in back of write_pos
-		if (readPos >  write_pos.load(std::memory_order_relaxed) + size) break; // Enough free front space
-		if (1) { // Let MTVU run to free up buffer space
+		if (readPos <= m_write_pos) break; // MTVU is reading in back of write_pos
+		if (readPos >  m_write_pos + size) break; // Enough free front space
+		{ // Let MTVU run to free up buffer space
 			KickStart();
 			if (IsDevBuild) DevCon.WriteLn("WaitOnSize()");
-			ScopedLock lock(mtxBusy);
+			// Locking might trigger a full flush of the ring buffer. Yield
+			// will be more aggressive, and only flush the minimal size.
+			// Performance will be smoother but it will consume extra CPU cycle
+			// on the EE thread (not an issue on 4 cores).
+			std::this_thread::yield();
 		}
 	}
 }
@ -175,83 +183,87 @@ __ri void VU_Thread::WaitOnSize(s32 size)
 // to write a continuous 'size * sizeof(u32)' bytes
 void VU_Thread::ReserveSpace(s32 size)
 {
-	pxAssert(write_pos < buffer_size);
+	pxAssert(m_write_pos < buffer_size);
 	pxAssert(size      < buffer_size);
 	pxAssert(size > 0);
-	pxAssert(write_offset == 0);
-	if (write_pos.load(std::memory_order_relaxed) + size > buffer_size) {
-		pxAssert(write_pos > 0);
+
+	if (m_write_pos + size > (buffer_size - 1)) {
 		WaitOnSize(1); // Size of MTVU_NULL_PACKET
 		Write(MTVU_NULL_PACKET);
-		write_offset = 0;
-		write_pos.store(0, std::memory_order_release);
+		// Reset local write pointer/position
+		m_write_pos = 0;
+		CommitWritePos();
 	}
+
 	WaitOnSize(size);
 }

 // Use this when reading read_pos from ee thread
 __fi s32 VU_Thread::GetReadPos()
 {
-	return read_pos.load(std::memory_order_acquire);
+	return m_ato_read_pos.load(std::memory_order_acquire);
 }
+
 // Use this when reading write_pos from vu thread
 __fi s32 VU_Thread::GetWritePos()
 {
-	return write_pos.load(std::memory_order_acquire);
-}
-// Gets the effective write pointer after adding write_offset
-__fi u32* VU_Thread::GetWritePtr()
-{
-	return &buffer[(write_pos.load(std::memory_order_relaxed) + write_offset) & buffer_mask];
+	return m_ato_write_pos.load(std::memory_order_acquire);
 }

-__fi void VU_Thread::incReadPos(s32 offset)
-{ // Offset in u32 sizes
-	read_pos.store((read_pos.load(std::memory_order_relaxed) + offset) & buffer_mask, std::memory_order_release);
+// Gets the effective write pointer after
+__fi u32* VU_Thread::GetWritePtr()
+{
+	pxAssert(m_write_pos < buffer_size);
+	return &buffer[m_write_pos];
 }
-__fi void VU_Thread::incWritePos()
-{ // Adds write_offset
-	s32 temp = (write_pos.load(std::memory_order_relaxed) + write_offset) & buffer_mask;
-	write_offset = 0;
-	write_pos.store(temp, std::memory_order_release);
+
+__fi void VU_Thread::CommitWritePos()
+{
+	m_ato_write_pos.store(m_write_pos, std::memory_order_release);
+
 	if (MTVU_ALWAYS_KICK) KickStart();
 	if (MTVU_SYNC_MODE)   WaitVU();
 }

+__fi void VU_Thread::CommitReadPos()
+{
+	m_ato_read_pos.store(m_read_pos, std::memory_order_release);
+}
+
 __fi u32 VU_Thread::Read()
 {
-	u32 ret = buffer[read_pos.load(std::memory_order_relaxed)];
-	incReadPos(1);
+	u32 ret = buffer[m_read_pos];
+	m_read_pos++;
 	return ret;
 }

 __fi void VU_Thread::Read(void* dest, u32 size)
 {
-	memcpy(dest, &buffer[read_pos.load(std::memory_order_relaxed)], size);
-	incReadPos(size_u32(size));
+	memcpy(dest, &buffer[m_read_pos], size);
+	m_read_pos += size_u32(size);
 }

 __fi void VU_Thread::ReadRegs(VIFregisters* dest)
 {
-	VIFregistersMTVU* src = (VIFregistersMTVU*)&buffer[read_pos.load(std::memory_order_relaxed)];
+	VIFregistersMTVU* src = (VIFregistersMTVU*)&buffer[m_read_pos];
 	dest->cycle = src->cycle;
 	dest->mode = src->mode;
 	dest->num = src->num;
 	dest->mask = src->mask;
 	dest->itop = src->itop;
 	dest->top = src->top;
-	incReadPos(size_u32(sizeof(VIFregistersMTVU)));
+	m_read_pos += size_u32(sizeof(VIFregistersMTVU));
 }

 __fi void VU_Thread::Write(u32 val)
 {
 	GetWritePtr()[0] = val;
-	write_offset += 1;
+	m_write_pos += 1;
 }
 __fi void VU_Thread::Write(void* src, u32 size)
 {
 	memcpy(GetWritePtr(), src, size);
-	write_offset += size_u32(size);
+	m_write_pos += size_u32(size);
 }

 __fi void VU_Thread::WriteRegs(VIFregisters* src)
@ -263,28 +275,28 @@ __fi void VU_Thread::WriteRegs(VIFregisters* src)
 	dest->mask = src->mask;
 	dest->top = src->top;
 	dest->itop = src->itop;
-	write_offset += size_u32(sizeof(VIFregistersMTVU));
+	m_write_pos += size_u32(sizeof(VIFregistersMTVU));
 }

 // Returns Average number of vu Cycles from last 4 runs
 // Used for vu cycle stealing hack
 u32 VU_Thread::Get_vuCycles()
 {
-	return (vuCycles[0].load(std::memory_order_relaxed) +
-			vuCycles[1].load(std::memory_order_relaxed) +
-			vuCycles[2].load(std::memory_order_relaxed) +
-			vuCycles[3].load(std::memory_order_relaxed)) >> 2;
+	return (vuCycles[0].load(std::memory_order_acquire) +
+			vuCycles[1].load(std::memory_order_acquire) +
+			vuCycles[2].load(std::memory_order_acquire) +
+			vuCycles[3].load(std::memory_order_acquire)) >> 2;
 }

 void VU_Thread::KickStart(bool forceKick)
 {
 	if ((forceKick && !semaEvent.Count())
-	|| (!isBusy.load(std::memory_order_relaxed) && GetReadPos() != write_pos.load(std::memory_order_relaxed))) semaEvent.Post();
+	|| (!isBusy.load(std::memory_order_acquire) && GetReadPos() != m_ato_write_pos.load(std::memory_order_relaxed))) semaEvent.Post();
 }

 bool VU_Thread::IsDone()
 {
-	return !isBusy.load(std::memory_order_relaxed) && GetReadPos() == GetWritePos();
+	return GetReadPos() == GetWritePos();
 }

 void VU_Thread::WaitVU()
@ -295,6 +307,7 @@ void VU_Thread::WaitVU()
 		//DevCon.WriteLn("WaitVU()");
 		pxAssert(THREAD_VU1);
 		KickStart();
+		std::this_thread::yield(); // Give a chance to the MTVU thread to actually start
 		ScopedLock lock(mtxBusy);
 	}
 }
@ -307,7 +320,7 @@ void VU_Thread::ExecuteVU(u32 vu_addr, u32 vif_top, u32 vif_itop)
 	Write(vu_addr);
 	Write(vif_top);
 	Write(vif_itop);
-	incWritePos();
+	CommitWritePos();
 	gifUnit.TransferGSPacketData(GIF_TRANS_MTVU, NULL, 0);
 	KickStart();
 	u32 cycles = std::min(Get_vuCycles(), 3000u);
@ -324,7 +337,7 @@ void VU_Thread::VifUnpack(vifStruct& _vif, VIFregisters& _vifRegs, u8* data, u32
 	WriteRegs(&_vifRegs);
 	Write(size);
 	Write(data, size);
-	incWritePos();
+	CommitWritePos();
 	KickStart();
 }

@ -336,7 +349,7 @@ void VU_Thread::WriteMicroMem(u32 vu_micro_addr, void* data, u32 size)
 	Write(vu_micro_addr);
 	Write(size);
 	Write(data, size);
-	incWritePos();
+	CommitWritePos();
 }

 void VU_Thread::WriteDataMem(u32 vu_data_addr, void* data, u32 size)
@ -347,7 +360,7 @@ void VU_Thread::WriteDataMem(u32 vu_data_addr, void* data, u32 size)
 	Write(vu_data_addr);
 	Write(size);
 	Write(data, size);
-	incWritePos();
+	CommitWritePos();
 }

 void VU_Thread::WriteCol(vifStruct& _vif)
@ -356,7 +369,7 @@ void VU_Thread::WriteCol(vifStruct& _vif)
 	ReserveSpace(1 + size_u32(sizeof(_vif.MaskCol)));
 	Write(MTVU_VIF_WRITE_COL);
 	Write(&_vif.MaskCol, sizeof(_vif.MaskCol));
-	incWritePos();
+	CommitWritePos();
 }

 void VU_Thread::WriteRow(vifStruct& _vif)
@ -365,5 +378,5 @@ void VU_Thread::WriteRow(vifStruct& _vif)
 	ReserveSpace(1 + size_u32(sizeof(_vif.MaskRow)));
 	Write(MTVU_VIF_WRITE_ROW);
 	Write(&_vif.MaskRow, sizeof(_vif.MaskRow));
-	incWritePos();
+	CommitWritePos();
 }
--- a/pcsx2/MTVU.h
+++ b/pcsx2/MTVU.h
@ -28,16 +28,18 @@
 // - ring-buffer has no complete pending packets when read_pos==write_pos
 class VU_Thread : public pxThread {
 	static const s32 buffer_size = (_1mb * 16) / sizeof(s32);
-	static const u32 buffer_mask = buffer_size - 1;
-	__aligned(4) u32 buffer[buffer_size];
-	std::atomic<int> read_pos; // Only modified by VU thread
-	std::atomic<bool> isBusy;   // Is thread processing data?
-	std::atomic<int> write_pos;    // Only modified by EE thread
-	__aligned(4) s32  write_offset; // Only modified by EE thread
-	__aligned(4) Mutex     mtxBusy;
-	__aligned(4) Semaphore semaEvent;
-	__aligned(4) BaseVUmicroCPU*& vuCPU;
-	__aligned(4) VURegs&          vuRegs;
+
+	u32 buffer[buffer_size];
+	// Note: keep atomic on separate cache line to avoid CPU conflict
+	__aligned(64) std::atomic<bool> isBusy;   // Is thread processing data?
+	__aligned(64) std::atomic<int> m_ato_read_pos; // Only modified by VU thread
+	__aligned(64) std::atomic<int> m_ato_write_pos;    // Only modified by EE thread
+	__aligned(64) int  m_read_pos; // temporary read pos (local to the VU thread)
+	int  m_write_pos; // temporary write pos (local to the EE thread)
+	Mutex     mtxBusy;
+	Semaphore semaEvent;
+	BaseVUmicroCPU*& vuCPU;
+	VURegs&          vuRegs;

 public:
 	__aligned16  vifStruct        vif;
@ -85,10 +87,11 @@ private:

 	s32 GetReadPos();
 	s32 GetWritePos();
+
 	u32* GetWritePtr();

-	void incReadPos(s32 offset);
-	void incWritePos();
+	void CommitWritePos();
+	void CommitReadPos();

 	u32 Read();
 	void Read(void* dest, u32 size);
--- a/pcsx2/SaveState.h
+++ b/pcsx2/SaveState.h
@ -24,7 +24,7 @@
 //  the lower 16 bit value.  IF the change is breaking of all compatibility with old
 //  states, increment the upper 16 bit value, and clear the lower 16 bits to 0.

-static const u32 g_SaveVersion = (0x9A0C << 16) | 0x0000;
+static const u32 g_SaveVersion = (0x9A0D << 16) | 0x0000;

 // this function is meant to be used in the place of GSfreeze, and provides a safe layer
 // between the GS saving function and the MTGS's needs. :)
--- a/plugins/GSdx/boost_spsc_queue.hpp
+++ b/plugins/GSdx/boost_spsc_queue.hpp
@ -44,6 +44,10 @@
 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 // DEALINGS IN THE SOFTWARE.

+#include <atomic>
+
+// I don't like it
+using namespace std;

 template <typename T, size_t max_size>
 class ringbuffer_base
@ -53,6 +57,9 @@ class ringbuffer_base
    atomic<size_t> write_index_;
    char padding1[padding_size]; /* force read_index and write_index to different cache lines */
    atomic<size_t> read_index_;
+    char padding2[padding_size]; /* force read_index and pending_pop_read_index to different cache lines */
+
+    size_t pending_pop_read_index;

    T *buffer;

@ -85,11 +92,12 @@ public:
    {
        size_t ret = arg + 1;
 #if 0
+        // Initial boost code
        while (unlikely(ret >= max_size))
-#else
-        while (ret >= max_size)
-#endif
            ret -= max_size;
+#else
+        ret %= max_size;
+#endif
        return ret;
    }

@ -123,6 +131,21 @@ public:
        return true;
    }

+    T& front()
+    {
+        pending_pop_read_index = read_index_.load(memory_order_relaxed); // only written from pop thread
+
+        return buffer[pending_pop_read_index];
+    }
+
+    void pop()
+    {
+        buffer[pending_pop_read_index].~T();
+
+        size_t next = next_index(pending_pop_read_index);
+        read_index_.store(next, memory_order_release);
+    }
+
    template <typename Functor>
    bool consume_one(Functor & f)
    {
@ -169,6 +192,17 @@ public:
        return write_index_.is_lock_free() && read_index_.is_lock_free();
    }

+    size_t size() const
+    {
+        const size_t write_index =  write_index_.load(memory_order_relaxed);
+        const size_t read_index = read_index_.load(memory_order_relaxed);
+        if (read_index > write_index) {
+            return (write_index + max_size) - read_index;
+        } else {
+            return write_index - read_index;
+        }
+    }
+
 private:
    bool empty(size_t write_index, size_t read_index)
    {