diff --git a/src/core/cpu_pgxp.cpp b/src/core/cpu_pgxp.cpp
index e18f967fd..d1c0d41eb 100644
--- a/src/core/cpu_pgxp.cpp
+++ b/src/core/cpu_pgxp.cpp
@@ -46,6 +46,8 @@ enum : u32
   VALID_X = (1u << 0),
   VALID_Y = (1u << 1),
   VALID_Z = (1u << 2),
+  VALID_TAINTED_Z = (1u << 31),
+
   VALID_XY = (VALID_X | VALID_Y),
   VALID_XYZ = (VALID_X | VALID_Y | VALID_Z),
   VALID_ALL = (VALID_X | VALID_Y | VALID_Z),
@@ -118,7 +120,7 @@ static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const
 // clang-format on
 
 static constexpr PGXP_value PGXP_value_invalid = {0.f, 0.f, 0.f, 0, 0};
-static constexpr PGXP_value PGXP_value_zero = {0.f, 0.f, 0.f, 0, VALID_ALL};
+static constexpr PGXP_value PGXP_value_zero = {0.f, 0.f, 0.f, 0, VALID_XY};
 
 static PGXP_value* s_mem = nullptr;
 static PGXP_value* s_vertex_cache = nullptr;
@@ -216,7 +218,7 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::MakeValid(PGXP_value* pV, u32 psxV)
   pV->x = static_cast<float>(static_cast<s16>(Truncate16(psxV)));
   pV->y = static_cast<float>(static_cast<s16>(Truncate16(psxV >> 16)));
   pV->z = 0.0f;
-  pV->flags = VALID_XY;
+  pV->flags = VALID_XY | VALID_TAINTED_Z;
   pV->value = psxV;
 }
 
@@ -371,16 +373,17 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(const PGXP_value* src, u32 addr
 
 ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXP_value& dst, const PGXP_value& src)
 {
-  if (dst.HasValid(COMP_Z))
-    return;
-
-  dst.z = src.z;
+  dst.z = dst.HasValid(COMP_Z) ? dst.z : src.z;
   dst.flags |= (src.flags & VALID_Z);
 }
 
 ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(PGXP_value& dst, const PGXP_value& src1, const PGXP_value& src2)
 {
-  dst.z = src1.HasValid(COMP_Z) ? src1.z : src2.z;
+  // Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise.
+  dst.z = (!(src1.flags & VALID_Z) ||
+           (src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ?
+            src2.z :
+            src1.z;
   dst.flags |= ((src1.flags | src2.flags) & VALID_Z);
 }
 
@@ -426,7 +429,7 @@ void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, co
 
     str.append_format(", {{{},{},{}}}", val->x, val->y, val->z);
 
-    if (val->flags != 0)
+    if (val->flags & VALID_ALL)
     {
       str.append(", valid=");
       if (val->flags & VALID_X)
@@ -437,6 +440,9 @@ void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, co
         str.append('Z');
     }
 
+    // if (val->flags & VALID_TAINTED_Z)
+    // str.append(", tainted");
+
     str.append(']');
   }
 }
@@ -759,6 +765,8 @@ void CPU::PGXP::CPU_ADDI(u32 instr, u32 rsVal)
   prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f;
 
   prtVal.value = rsVal + tempImm.d;
+
+  prtVal.flags |= VALID_TAINTED_Z;
 }
 
 void CPU::PGXP::CPU_ANDI(u32 instr, u32 rsVal)
@@ -779,13 +787,14 @@ void CPU::PGXP::CPU_ANDI(u32 instr, u32 rsVal)
   prtVal.value = rtVal;
   prtVal.y = 0.f; // remove upper 16-bits
   prtVal.SetValid(COMP_Y);
+  prtVal.flags |= VALID_TAINTED_Z;
 
   switch (imm(instr))
   {
     case 0:
       // if 0 then x == 0
-      // TODO: x should be valid here
-      prtVal.x = 0.f;
+      prtVal.x = 0.0f;
+      prtVal.SetValid(COMP_X);
       break;
     case 0xFFFF:
       // if saturated then x == x
@@ -820,6 +829,7 @@ void CPU::PGXP::CPU_ORI(u32 instr, u32 rsVal)
       // otherwise x is low precision value
       ret.x = vRt.sw.l;
       ret.SetValid(COMP_X);
+      ret.flags |= VALID_TAINTED_Z;
       break;
   }
 
@@ -849,6 +859,7 @@ void CPU::PGXP::CPU_XORI(u32 instr, u32 rsVal)
       // otherwise x is low precision value
       ret.x = vRt.sw.l;
       ret.SetValid(COMP_X);
+      ret.flags |= VALID_TAINTED_Z;
       break;
   }
 
@@ -869,6 +880,7 @@ void CPU::PGXP::CPU_SLTI(u32 instr, u32 rsVal)
   ret.y = 0.f;
   ret.x = (g_state.pgxp_gpr[rs(instr)].x < tempImm.sw.h) ? 1.f : 0.f;
   ret.SetValid(COMP_Y);
+  ret.flags |= VALID_TAINTED_Z;
   ret.value = BoolToUInt32(static_cast<s32>(rsVal) < imm_sext(instr));
 
   g_state.pgxp_gpr[rt(instr)] = ret;
@@ -887,6 +899,7 @@ void CPU::PGXP::CPU_SLTIU(u32 instr, u32 rsVal)
   ret.y = 0.f;
   ret.x = (f16Unsign(g_state.pgxp_gpr[rs(instr)].x) < tempImm.w.h) ? 1.f : 0.f;
   ret.SetValid(COMP_Y);
+  ret.flags |= VALID_TAINTED_Z;
   ret.value = BoolToUInt32(rsVal < imm(instr));
 
   g_state.pgxp_gpr[rt(instr)] = ret;
@@ -922,10 +935,12 @@ void CPU::PGXP::CPU_ADD(u32 instr, u32 rsVal, u32 rtVal)
   if (rtVal == 0)
   {
     ret = g_state.pgxp_gpr[rs(instr)];
+    CopyZIfMissing(ret, g_state.pgxp_gpr[rt(instr)]);
   }
   else if (rsVal == 0)
   {
     ret = g_state.pgxp_gpr[rt(instr)];
+    CopyZIfMissing(ret, g_state.pgxp_gpr[rs(instr)]);
   }
   else
   {
@@ -951,15 +966,8 @@ void CPU::PGXP::CPU_ADD(u32 instr, u32 rsVal, u32 rtVal)
     // truncate on overflow/underflow
     ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f;
 
-    // TODO: decide which "z/w" component to use
-
-    ret.flags &= (g_state.pgxp_gpr[rt(instr)].flags & VALID_XY) | ~VALID_XY;
-  }
-
-  if (!(ret.flags & VALID_Z) && (g_state.pgxp_gpr[rt(instr)].flags & VALID_Z))
-  {
-    ret.z = g_state.pgxp_gpr[rt(instr)].z;
-    ret.SetValid(COMP_Z);
+    SelectZ(ret, ret, g_state.pgxp_gpr[rt(instr)]);
+    ret.flags |= VALID_TAINTED_Z;
   }
 
   ret.value = rsVal + rtVal;
@@ -979,6 +987,7 @@ void CPU::PGXP::CPU_SUB(u32 instr, u32 rsVal, u32 rtVal)
   if (rtVal == 0)
   {
     ret = g_state.pgxp_gpr[rs(instr)];
+    CopyZIfMissing(ret, g_state.pgxp_gpr[rs(instr)]);
   }
   else
   {
@@ -1003,16 +1012,11 @@ void CPU::PGXP::CPU_SUB(u32 instr, u32 rsVal, u32 rtVal)
     // truncate on overflow/underflow
     ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f;
 
-    ret.flags &= (g_state.pgxp_gpr[rt(instr)].flags & VALID_XY) | ~VALID_XY;
-
-    ret.value = rsVal - rtVal;
+    SelectZ(ret, ret, g_state.pgxp_gpr[rt(instr)]);
+    ret.flags |= VALID_TAINTED_Z;
   }
 
-  if (!(ret.flags & VALID_Z) && (g_state.pgxp_gpr[rt(instr)].flags & VALID_Z))
-  {
-    ret.z = g_state.pgxp_gpr[rt(instr)].z;
-    ret.SetValid(COMP_Z);
-  }
+  ret.value = rsVal - rtVal;
 
   g_state.pgxp_gpr[rd(instr)] = ret;
 }
@@ -1040,7 +1044,7 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(u32 instr, u32 rdVal, u32 rsVa
   valt.d = rtVal;
 
   PGXP_value ret;
-  ret.flags = VALID_XY;
+  ret.flags = VALID_XY | VALID_TAINTED_Z;
 
   if (vald.w.l == 0)
   {
@@ -1163,6 +1167,7 @@ void CPU::PGXP::CPU_SLT(u32 instr, u32 rsVal, u32 rtVal)
   PGXP_value ret = g_state.pgxp_gpr[rs(instr)];
   ret.y = 0.f;
   ret.SetValid(COMP_Y);
+  ret.flags |= VALID_TAINTED_Z;
 
   ret.x = (g_state.pgxp_gpr[rs(instr)].y < g_state.pgxp_gpr[rt(instr)].y)                       ? 1.f :
           (f16Unsign(g_state.pgxp_gpr[rs(instr)].x) < f16Unsign(g_state.pgxp_gpr[rt(instr)].x)) ? 1.f :
@@ -1191,6 +1196,7 @@ void CPU::PGXP::CPU_SLTU(u32 instr, u32 rsVal, u32 rtVal)
   PGXP_value ret = g_state.pgxp_gpr[rs(instr)];
   ret.y = 0.f;
   ret.SetValid(COMP_Y);
+  ret.flags |= VALID_TAINTED_Z;
 
   ret.x = (f16Unsign(g_state.pgxp_gpr[rs(instr)].y) < f16Unsign(g_state.pgxp_gpr[rt(instr)].y)) ? 1.f :
           (f16Unsign(g_state.pgxp_gpr[rs(instr)].x) < f16Unsign(g_state.pgxp_gpr[rt(instr)].x)) ? 1.f :
@@ -1248,8 +1254,10 @@ void CPU::PGXP::CPU_MULT(u32 instr, u32 rsVal, u32 rtVal)
 
   g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].x = (float)f16Sign(lx);
   g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].y = (float)f16Sign(ly);
+  g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].flags |= VALID_TAINTED_Z;
   g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].x = (float)f16Sign(hx);
   g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].y = (float)f16Sign(hy);
+  g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].flags |= VALID_TAINTED_Z;
 
   // compute PSX value
   const u64 result = static_cast<u64>(static_cast<s64>(SignExtend64(rsVal)) * static_cast<s64>(SignExtend64(rtVal)));
@@ -1301,8 +1309,10 @@ void CPU::PGXP::CPU_MULTU(u32 instr, u32 rsVal, u32 rtVal)
 
   g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].x = (float)f16Sign(lx);
   g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].y = (float)f16Sign(ly);
+  g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].flags |= VALID_TAINTED_Z;
   g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].x = (float)f16Sign(hx);
   g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].y = (float)f16Sign(hy);
+  g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].flags |= VALID_TAINTED_Z;
 
   // compute PSX value
   const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal);
@@ -1339,10 +1349,12 @@ void CPU::PGXP::CPU_DIV(u32 instr, u32 rsVal, u32 rtVal)
   double lo = vs / vt;
   g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].y = (float)f16Sign(f16Overflow(lo));
   g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].x = (float)f16Sign(lo);
+  g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].flags |= VALID_TAINTED_Z;
 
   double hi = fmod(vs, vt);
   g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].y = (float)f16Sign(f16Overflow(hi));
   g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].x = (float)f16Sign(hi);
+  g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].flags |= VALID_TAINTED_Z;
 
   // compute PSX value
   if (static_cast<s32>(rtVal) == 0)
@@ -1396,10 +1408,12 @@ void CPU::PGXP::CPU_DIVU(u32 instr, u32 rsVal, u32 rtVal)
   double lo = vs / vt;
   g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].y = (float)f16Sign(f16Overflow(lo));
   g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].x = (float)f16Sign(lo);
+  g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].flags |= VALID_TAINTED_Z;
 
   double hi = fmod(vs, vt);
   g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].y = (float)f16Sign(f16Overflow(hi));
   g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].x = (float)f16Sign(hi);
+  g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].flags |= VALID_TAINTED_Z;
 
   if (rtVal == 0)
   {
@@ -1460,6 +1474,7 @@ void CPU::PGXP::CPU_SLL(u32 instr, u32 rtVal)
   prdVal.x = static_cast<float>(x);
   prdVal.y = static_cast<float>(y);
   prdVal.value = rdVal;
+  prdVal.flags |= VALID_TAINTED_Z;
 }
 
 void CPU::PGXP::CPU_SRL(u32 instr, u32 rtVal)
@@ -1525,6 +1540,7 @@ void CPU::PGXP::CPU_SRL(u32 instr, u32 rtVal)
   prdVal.x = static_cast<float>(x);
   prdVal.y = static_cast<float>(y);
   prdVal.value = rdVal;
+  prdVal.flags |= VALID_TAINTED_Z;
 }
 
 void CPU::PGXP::CPU_SRA(u32 instr, u32 rtVal)
@@ -1590,6 +1606,7 @@ void CPU::PGXP::CPU_SRA(u32 instr, u32 rtVal)
   prdVal.x = static_cast<float>(x);
   prdVal.y = static_cast<float>(y);
   prdVal.value = rdVal;
+  prdVal.flags |= VALID_TAINTED_Z;
 
   // Use low precision/rounded values when we're not shifting an entire component,
   // and it's not originally from a 3D value. Too many false positives in P2/etc.
@@ -1649,6 +1666,7 @@ void CPU::PGXP::CPU_SLLV(u32 instr, u32 rtVal, u32 rsVal)
   prdVal.x = static_cast<float>(x);
   prdVal.y = static_cast<float>(y);
   prdVal.value = rdVal;
+  prdVal.flags |= VALID_TAINTED_Z;
 }
 
 void CPU::PGXP::CPU_SRLV(u32 instr, u32 rtVal, u32 rsVal)
@@ -1708,12 +1726,12 @@ void CPU::PGXP::CPU_SRLV(u32 instr, u32 rtVal, u32 rsVal)
   else
     y = y / (1 << sh);
 
-
   PGXP_value& prdVal = g_state.pgxp_gpr[rd(instr)];
   prdVal = prtVal;
   prdVal.x = static_cast<float>(f16Sign(x));
   prdVal.y = static_cast<float>(f16Sign(y));
   prdVal.value = rdVal;
+  prdVal.flags |= VALID_TAINTED_Z;
 }
 
 void CPU::PGXP::CPU_SRAV(u32 instr, u32 rtVal, u32 rsVal)
@@ -1778,6 +1796,7 @@ void CPU::PGXP::CPU_SRAV(u32 instr, u32 rtVal, u32 rsVal)
   prdVal.x = static_cast<float>(f16Sign(x));
   prdVal.y = static_cast<float>(f16Sign(y));
   prdVal.value = rdVal;
+  prdVal.flags |= VALID_TAINTED_Z;
 }
 
 void CPU::PGXP::CPU_MFC0(u32 instr, u32 rdVal)