diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp
index ffc3ac9efb..ed088e2d69 100644
--- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp
+++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp
@@ -87,7 +87,7 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
 	// Scale offset buffer
 	// Separate constant buffer
 	void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + 512));
-	fill_scale_offset_data(mapped_buffer, true, false);
+	fill_scale_offset_data(mapped_buffer, true);
 	fill_user_clip_data((char*)mapped_buffer + 64);
 	fill_fragment_state_buffer((char *)mapped_buffer + 128, m_fragment_program);
 	m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + 512));
diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
index 7924b52d44..ee6bdc6c7e 100644
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@@ -787,7 +787,7 @@ bool GLGSRender::load_program()
 	auto mapping = m_scale_offset_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align);
 	buf = static_cast<u8*>(mapping.first);
 	scale_offset_offset = mapping.second;
-	fill_scale_offset_data(buf, false, true);
+	fill_scale_offset_data(buf, false);
 	fill_user_clip_data((char *)buf + 64);
 
 	if (m_transform_constants_dirty)
diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
index 24c0aace04..0a12326197 100644
--- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
+++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp
@@ -375,6 +375,20 @@ void GLVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
 			OS << "	front_spec_color = dst_reg2;\n";
 
 	OS << "	gl_Position = gl_Position * scaleOffsetMat;" << std::endl;
+
+	//Since our clip_space is symetrical [-1, 1] we map it to linear space using the eqn:
+	//ln = (clip * 2) - 1 to fully utilize the 0-1 range of the depth buffer
+	//RSX matrices passed already map to the [0, 1] range but mapping to classic OGL requires that we undo this step
+	//This can be made unnecessary using the call glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE).
+	//However, ClipControl only made it to opengl core in ver 4.5 though, so this is a workaround.
+	
+	//NOTE: It is completely valid for games to use very large w values, causing the post-multiplied z to be in the hundreds
+	//It is therefore critical that this step is done post-transform and the result re-scaled by w
+	//SEE Naruto: UNS
+	
+	OS << "	float ndc_z = gl_Position.z / gl_Position.w;" << std::endl;
+	OS << "	ndc_z = (ndc_z * 2.) - 1.;" << std::endl;
+	OS << "	gl_Position.z = ndc_z * gl_Position.w;" << std::endl;
 	OS << "}" << std::endl;
 }
 
diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index c9b295fad4..a2805a07e5 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -506,7 +506,7 @@ namespace rsx
 		return "rsx::thread";
 	}
 
-	void thread::fill_scale_offset_data(void *buffer, bool flip_y, bool symmetrical_z) const
+	void thread::fill_scale_offset_data(void *buffer, bool flip_y) const
 	{
 		int clip_w = rsx::method_registers.surface_clip_width();
 		int clip_h = rsx::method_registers.surface_clip_height();
@@ -523,20 +523,6 @@ namespace rsx
 
 		float scale_z = rsx::method_registers.viewport_scale_z();
 		float offset_z = rsx::method_registers.viewport_offset_z();
-		
-		if (symmetrical_z)
-		{
-			//Since our clip_space is symetrical [-1, 1] we map it to linear space using the eqn:
-			//ln = (clip * 2) - 1 to fully utilize the 0-1 range of the depth buffer
-			//RSX matrices passed already map to the [0, 1] range but mapping to classic OGL
-			//Requires that we undo this step
-			//This can be made unnecessary using the call glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE).
-			//However, ClipControl only made it to opengl core in ver 4.5 though, so this is a workaround.
-
-			offset_z -= 1.f;
-			scale_z *= 2.f;
-		}
-
 		float one = 1.f;
 
 		stream_vector(buffer, (u32&)scale_x, 0, 0, (u32&)offset_x);
diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h
index b91489ff7a..e44fe21faf 100644
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@@ -244,9 +244,9 @@ namespace rsx
 		/**
 		 * Fill buffer with 4x4 scale offset matrix.
 		 * Vertex shader's position is to be multiplied by this matrix.
-		 * if is_d3d is set, the matrix is modified to use d3d convention.
+		 * if flip_y is set, the matrix is modified to use d3d convention.
 		 */
-		void fill_scale_offset_data(void *buffer, bool flip_y, bool symmetrical_z) const;
+		void fill_scale_offset_data(void *buffer, bool flip_y) const;
 
 		/**
 		 * Fill buffer with user clip information
diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
index 556f70ef4b..d95f51944e 100644
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@@ -1492,7 +1492,7 @@ bool VKGSRender::load_program()
 	* NOTE: While VK's coord system resembles GLs, the clip volume is no longer symetrical in z
 	* Its like D3D without the flip in y (depending on how you build the spir-v)
 	*/
-	fill_scale_offset_data(buf, false, false);
+	fill_scale_offset_data(buf, false);
 	fill_user_clip_data(buf + 64);
 
 	m_uniform_buffer_ring_info.unmap();