From d9b08f569f29907e9c635fb3c0b4c76c2d90a92e Mon Sep 17 00:00:00 2001
From: Ben Vanik <ben.vanik@gmail.com>
Date: Sat, 26 Jan 2013 21:51:31 -0800
Subject: [PATCH] Adding a bunch of instructions.

---
 TODO.md                                       |  28 +-
 .../xenia/cpu/codegen/function_generator.h    |   7 +
 include/xenia/cpu/ppc/instr.h                 |  11 +
 private/runtest.sh                            |   5 +
 src/cpu/codegen/emit_alu.cc                   | 433 +++++++++++++-----
 src/cpu/codegen/emit_control.cc               | 151 +++++-
 src/cpu/codegen/emit_memory.cc                |  10 +-
 src/cpu/codegen/function_generator.cc         | 123 ++++-
 src/cpu/codegen/module_generator.cc           |   1 +
 src/cpu/exec_module.cc                        |  15 +
 10 files changed, 616 insertions(+), 168 deletions(-)

diff --git a/TODO.md b/TODO.md
index 59e896005..5e0feec96 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,33 +1,19 @@
 ## Instructions
 
-```
-need cr0:
-andix
-orx
-mullwx
-divwux
+### XER CA bit (carry)
 
-addx
-addix
-addic
-addzex
-subfx
-subfex
-subficx
+Not sure the way I'm doing this is right. addic/subficx/etc set it to the value
+of the overflow bit from the LLVM *_with_overflow intrinsic.
+
+```
+MISDECODING: andix
 
 rlwinmx
 rlwimix
 rldiclx
-extsbx
+
 slwx
 srawix
-
-# can be no-op, or @llvm.prefetch
-dcbt
-dcbtst
-
-twi # @llvm.debugtrap ?
-
 ```
 
 Overflow bits can be set via the intrinsics:
diff --git a/include/xenia/cpu/codegen/function_generator.h b/include/xenia/cpu/codegen/function_generator.h
index d81d598e4..8889c0d99 100644
--- a/include/xenia/cpu/codegen/function_generator.h
+++ b/include/xenia/cpu/codegen/function_generator.h
@@ -66,13 +66,20 @@ public:
 
   llvm::Value* xer_value();
   void update_xer_value(llvm::Value* value);
+  void update_xer_with_overflow(llvm::Value* value);
+  void update_xer_with_carry(llvm::Value* value);
+  void update_xer_with_overflow_and_carry(llvm::Value* value);
+
   llvm::Value* lr_value();
   void update_lr_value(llvm::Value* value);
+
   llvm::Value* ctr_value();
   void update_ctr_value(llvm::Value* value);
 
   llvm::Value* cr_value(uint32_t n);
   void update_cr_value(uint32_t n, llvm::Value* value);
+  void update_cr_with_cond(uint32_t n, llvm::Value* lhs, llvm::Value* rhs,
+                           bool is_signed);
 
   llvm::Value* gpr_value(uint32_t n);
   void update_gpr_value(uint32_t n, llvm::Value* value);
diff --git a/include/xenia/cpu/ppc/instr.h b/include/xenia/cpu/ppc/instr.h
index fab692e2c..c4b48d0d4 100644
--- a/include/xenia/cpu/ppc/instr.h
+++ b/include/xenia/cpu/ppc/instr.h
@@ -166,6 +166,17 @@ typedef struct {
       uint32_t                : 6;
     } M;
     // kXEPPCInstrFormatMD
+    struct {
+      uint32_t        Rc      : 1;
+      uint32_t        SH5     : 1;
+      uint32_t                : 3;
+      uint32_t        MB5     : 1;
+      uint32_t        MB      : 5;
+      uint32_t        SH      : 5;
+      uint32_t        RA      : 5;
+      uint32_t        RS      : 5;
+      uint32_t                : 6;
+    } MD;
     // kXEPPCInstrFormatMDS
     // kXEPPCInstrFormatVA
     // kXEPPCInstrFormatVX
diff --git a/private/runtest.sh b/private/runtest.sh
index 95b9dbaea..b05bbb88e 100755
--- a/private/runtest.sh
+++ b/private/runtest.sh
@@ -1,6 +1,11 @@
 python xenia-build.py xethunk
 python xenia-build.py build
 
+if [ "$?" -ne 0 ]; then
+  echo "Build failed!"
+  exit $?
+fi
+
 ./build/xenia/release/xenia-run \
     private/$1 \
     --optimize_ir_modules=true \
diff --git a/src/cpu/codegen/emit_alu.cc b/src/cpu/codegen/emit_alu.cc
index b17f43f01..eb26e9237 100644
--- a/src/cpu/codegen/emit_alu.cc
+++ b/src/cpu/codegen/emit_alu.cc
@@ -29,21 +29,35 @@ namespace codegen {
 XEEMITTER(addx,         0x7C000214, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
   // RD <- (RA) + (RB)
 
-  if (i.XO.Rc) {
-    // With cr0 update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
   if (i.XO.OE) {
     // With XER update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
+    // This is a different codepath as we need to use llvm.sadd.with.overflow.
+
+    Function* sadd_with_overflow = Intrinsic::getDeclaration(
+        g.gen_module(), Intrinsic::sadd_with_overflow, b.getInt64Ty());
+    Value* v = b.CreateCall2(sadd_with_overflow,
+                             g.gpr_value(i.XO.RA), g.gpr_value(i.XO.RB));
+    g.update_gpr_value(i.XO.RT, b.CreateExtractValue(v, 0));
+    g.update_xer_with_overflow(b.CreateExtractValue(v, 1));
+
+    if (i.XO.Rc) {
+      // With cr0 update.
+      g.update_cr_with_cond(0, v, b.getInt64(0), true);
+    }
+
+    return 0;
+  } else {
+    // No OE bit setting.
+    Value* v = b.CreateAdd(g.gpr_value(i.XO.RA), g.gpr_value(i.XO.RB));
+    g.update_gpr_value(i.XO.RT, v);
+
+    if (i.XO.Rc) {
+      // With cr0 update.
+      g.update_cr_with_cond(0, v, b.getInt64(0), true);
+    }
+
+    return 0;
   }
-
-  Value* v = b.CreateAdd(g.gpr_value(i.XO.RA), g.gpr_value(i.XO.RB));
-  g.update_gpr_value(i.XO.RT, v);
-
-  return 0;
 }
 
 XEEMITTER(addcx,        0X7C000014, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
@@ -57,8 +71,10 @@ XEEMITTER(addex,        0x7C000114, XO )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(addi,         0x38000000, D  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  // if RA = 0 then RT <- EXTS(SI)
-  // else RT <- (RA) + EXTS(SI)
+  // if RA = 0 then
+  //   RT <- EXTS(SI)
+  // else
+  //   RT <- (RA) + EXTS(SI)
 
   Value* v = b.getInt64(XEEXTS16(i.D.DS));
   if (i.D.RA) {
@@ -70,8 +86,16 @@ XEEMITTER(addi,         0x38000000, D  )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(addic,        0x30000000, D  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // RT <- (RA) + EXTS(SI)
+
+  Function* sadd_with_overflow = Intrinsic::getDeclaration(
+      g.gen_module(), Intrinsic::sadd_with_overflow, b.getInt64Ty());
+  Value* v = b.CreateCall2(sadd_with_overflow,
+                           g.gpr_value(i.D.RA), b.getInt64(XEEXTS16(i.D.DS)));
+  g.update_gpr_value(i.D.RT, b.CreateExtractValue(v, 0));
+  g.update_xer_with_carry(b.CreateExtractValue(v, 1));
+
+  return 0;
 }
 
 XEEMITTER(addicx,       0x34000000, D  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
@@ -80,8 +104,10 @@ XEEMITTER(addicx,       0x34000000, D  )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(addis,        0x3C000000, D  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  // if RA = 0 then RT <- EXTS(SI) || i16.0
-  // else RT <- (RA) + EXTS(SI) || i16.0
+  // if RA = 0 then
+  //   RT <- EXTS(SI) || i16.0
+  // else
+  //   RT <- (RA) + EXTS(SI) || i16.0
 
   Value* v = b.getInt64(XEEXTS16(i.D.DS) << 16);
   if (i.D.RA) {
@@ -98,8 +124,29 @@ XEEMITTER(addmex,       0x7C0001D4, XO )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(addzex,       0x7C000194, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // RT <- (RA) + CA
+
+  Function* sadd_with_overflow = Intrinsic::getDeclaration(
+      g.gen_module(), Intrinsic::sadd_with_overflow, b.getInt64Ty());
+  Value* ca = b.CreateAnd(b.CreateLShr(g.xer_value(), 29), 0x1);
+  Value* v = b.CreateCall2(sadd_with_overflow,
+                           g.gpr_value(i.XO.RA), ca);
+  Value* add_value = b.CreateExtractValue(v, 0);
+  g.update_gpr_value(i.XO.RT, add_value);
+  if (i.XO.OE) {
+    // With XER[SO] update too.
+    g.update_xer_with_overflow_and_carry(b.CreateExtractValue(v, 1));
+  } else {
+    // Just CA update.
+    g.update_xer_with_carry(b.CreateExtractValue(v, 1));
+  }
+
+  if (i.XO.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, add_value, b.getInt64(0), true);
+  }
+
+  return 0;
 }
 
 XEEMITTER(divdx,        0x7C0003D2, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
@@ -120,24 +167,50 @@ XEEMITTER(divwx,        0x7C0003D6, XO )(FunctionGenerator& g, IRBuilder<>& b, I
 XEEMITTER(divwux,       0x7C000396, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
   // dividend[0:31] <- (RA)[32:63]
   // divisor[0:31] <- (RB)[32:63]
+  // if divisor = 0 then
+  //   if OE = 1 then
+  //     XER[OV] <- 1
+  //   return
   // RT[32:63] <- dividend ÷ divisor
   // RT[0:31] <- undefined
 
-  if (i.XO.Rc) {
-    // With cr0 update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-  if (i.XO.OE) {
-    // With XER update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-
   Value* dividend = b.CreateTrunc(g.gpr_value(i.XO.RA), b.getInt32Ty());
   Value* divisor = b.CreateTrunc(g.gpr_value(i.XO.RB), b.getInt32Ty());
+
+  // Note that we skip the zero handling block and just avoid the divide if
+  // we are OE=0.
+  BasicBlock* zero_bb = i.XO.OE ?
+      BasicBlock::Create(*g.context(), "", g.gen_fn()) : NULL;
+  BasicBlock* nonzero_bb = BasicBlock::Create(*g.context(), "", g.gen_fn());
+  BasicBlock* after_bb = BasicBlock::Create(*g.context(), "", g.gen_fn());
+  b.CreateCondBr(b.CreateICmpEQ(divisor, b.getInt32(0)),
+                 i.XO.OE ? zero_bb : after_bb, nonzero_bb);
+
+  if (zero_bb) {
+    // Divisor was zero - do XER update.
+    b.SetInsertPoint(zero_bb);
+    g.update_xer_with_overflow(b.getInt1(1));
+    b.CreateBr(after_bb);
+  }
+
+  // Divide.
+  b.SetInsertPoint(nonzero_bb);
   Value* v = b.CreateUDiv(dividend, divisor);
   v = b.CreateZExt(v, b.getInt64Ty());
+  g.update_gpr_value(i.XO.RT, v);
+
+  // If we are OE=1 we need to clear the overflow bit.
+  g.update_xer_with_overflow(b.getInt1(0));
+
+  if (i.XO.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
+  b.CreateBr(after_bb);
+
+  // Resume.
+  b.SetInsertPoint(after_bb);
 
   return 0;
 }
@@ -184,11 +257,6 @@ XEEMITTER(mulli,        0x1C000000, D  )(FunctionGenerator& g, IRBuilder<>& b, I
 XEEMITTER(mullwx,       0x7C0001D6, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
   // RT <- (RA)[32:63] × (RB)[32:63]
 
-  if (i.XO.Rc) {
-    // With cr0 update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
   if (i.XO.OE) {
     // With XER update.
     XEINSTRNOTIMPLEMENTED();
@@ -199,6 +267,11 @@ XEEMITTER(mullwx,       0x7C0001D6, XO )(FunctionGenerator& g, IRBuilder<>& b, I
                          b.CreateSExt(g.gpr_value(i.XO.RB), b.getInt64Ty()));
   g.update_gpr_value(i.XO.RT, v);
 
+  if (i.XO.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
   return 0;
 }
 
@@ -208,8 +281,37 @@ XEEMITTER(negx,         0x7C0000D0, XO )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(subfx,        0x7C000050, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // RT <- ¬(RA) + (RB) + 1
+
+  if (i.XO.OE) {
+    // With XER update.
+    // This is a different codepath as we need to use llvm.ssub.with.overflow.
+
+    Function* ssub_with_overflow = Intrinsic::getDeclaration(
+        g.gen_module(), Intrinsic::ssub_with_overflow, b.getInt64Ty());
+    Value* v = b.CreateCall2(ssub_with_overflow,
+                             g.gpr_value(i.XO.RB), g.gpr_value(i.XO.RA));
+    g.update_gpr_value(i.XO.RT, b.CreateExtractValue(v, 0));
+    g.update_xer_with_overflow(b.CreateExtractValue(v, 1));
+
+    if (i.XO.Rc) {
+      // With cr0 update.
+      g.update_cr_with_cond(0, v, b.getInt64(0), true);
+    }
+
+    return 0;
+  } else {
+    // No OE bit setting.
+    Value* v = b.CreateSub(g.gpr_value(i.XO.RB), g.gpr_value(i.XO.RA));
+    g.update_gpr_value(i.XO.RT, v);
+
+    if (i.XO.Rc) {
+      // With cr0 update.
+      g.update_cr_with_cond(0, v, b.getInt64(0), true);
+    }
+
+    return 0;
+  }
 }
 
 XEEMITTER(subfcx,       0x7C000010, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
@@ -218,13 +320,45 @@ XEEMITTER(subfcx,       0x7C000010, XO )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(subficx,      0x20000000, D  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // RT <- ¬(RA) + EXTS(SI) + 1
+
+  Function* ssub_with_overflow = Intrinsic::getDeclaration(
+      g.gen_module(), Intrinsic::ssub_with_overflow, b.getInt64Ty());
+  Value* v = b.CreateCall2(ssub_with_overflow,
+                           b.getInt64(XEEXTS16(i.D.DS)), g.gpr_value(i.D.RA));
+  g.update_gpr_value(i.D.RT, b.CreateExtractValue(v, 0));
+  g.update_xer_with_carry(b.CreateExtractValue(v, 1));
+
+  return 0;
 }
 
 XEEMITTER(subfex,       0x7C000110, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // RT <- ¬(RA) + (RB) + CA
+
+  // TODO(benvanik): possible that the add of rb+ca needs to also check for
+  //     overflow!
+
+  Value* ca = b.CreateAnd(b.CreateLShr(g.xer_value(), 29), 0x1);
+  Function* uadd_with_overflow = Intrinsic::getDeclaration(
+      g.gen_module(), Intrinsic::uadd_with_overflow, b.getInt64Ty());
+  Value* v = b.CreateCall2(uadd_with_overflow,
+                           b.CreateNot(g.gpr_value(i.XO.RA)),
+                           b.CreateAdd(g.gpr_value(i.XO.RB), ca));
+  g.update_gpr_value(i.XO.RT, b.CreateExtractValue(v, 0));
+
+  if (i.XO.OE) {
+    // With XER update.
+    g.update_xer_with_overflow_and_carry(b.CreateExtractValue(v, 1));
+  } else {
+    g.update_xer_with_carry(b.CreateExtractValue(v, 1));
+  }
+
+  if (i.XO.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
+  return 0;
 }
 
 XEEMITTER(subfmex,      0x7C0001D0, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
@@ -240,29 +374,6 @@ XEEMITTER(subfzex,      0x7C000190, XO )(FunctionGenerator& g, IRBuilder<>& b, I
 
 // Integer compare (A-4)
 
-void XeEmitCompareCore(FunctionGenerator& g, IRBuilder<>& b,
-                       Value* lhs, Value* rhs, uint32_t BF, bool is_signed) {
-  // bit0 = RA < RB
-  // bit1 = RA > RB
-  // bit2 = RA = RB
-  // bit3 = XER[SO]
-  // Bits are reversed:
-  // 0123
-  // 3210
-
-  Value* is_lt = is_signed ?
-      b.CreateICmpSLT(lhs, rhs) : b.CreateICmpULT(lhs, rhs);
-  Value* is_gt = is_signed ?
-      b.CreateICmpSGT(lhs, rhs) : b.CreateICmpUGT(lhs, rhs);
-  Value* cp = b.CreateSelect(is_gt, b.getInt8(1 << 2), b.getInt8(1 << 1));
-  Value* c = b.CreateSelect(is_lt, b.getInt8(1 << 3), cp);
-
-  // TODO(benvanik): set bit 4 to XER[SO]
-
-  // Insert the 4 bits into their location in the CR.
-  g.update_cr_value(BF, c);
-}
-
 XEEMITTER(cmp,          0x7C000000, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
   // if L = 0 then
   //   a <- EXTS((RA)[32:63])
@@ -291,7 +402,7 @@ XEEMITTER(cmp,          0x7C000000, X  )(FunctionGenerator& g, IRBuilder<>& b, I
     rhs = b.CreateSExt(rhs, b.getInt64Ty());
   }
 
-  XeEmitCompareCore(g, b, lhs, rhs, BF, true);
+  g.update_cr_with_cond(BF, lhs, rhs, true);
 
   return 0;
 }
@@ -320,7 +431,7 @@ XEEMITTER(cmpi,         0x2C000000, D  )(FunctionGenerator& g, IRBuilder<>& b, I
   }
 
   Value* rhs = b.getInt64(XEEXTS16(i.D.DS));
-  XeEmitCompareCore(g, b, lhs, rhs, BF, true);
+  g.update_cr_with_cond(BF, lhs, rhs, true);
 
   return 0;
 }
@@ -353,7 +464,7 @@ XEEMITTER(cmpl,         0x7C000040, X  )(FunctionGenerator& g, IRBuilder<>& b, I
     rhs = b.CreateZExt(rhs, b.getInt64Ty());
   }
 
-  XeEmitCompareCore(g, b, lhs, rhs, BF, false);
+  g.update_cr_with_cond(BF, lhs, rhs, false);
 
   return 0;
 }
@@ -382,7 +493,7 @@ XEEMITTER(cmpli,        0x28000000, D  )(FunctionGenerator& g, IRBuilder<>& b, I
   }
 
   Value* rhs = b.getInt64(i.D.DS);
-  XeEmitCompareCore(g, b, lhs, rhs, BF, false);
+  g.update_cr_with_cond(BF, lhs, rhs, false);
 
   return 0;
 }
@@ -393,31 +504,29 @@ XEEMITTER(cmpli,        0x28000000, D  )(FunctionGenerator& g, IRBuilder<>& b, I
 XEEMITTER(andx,         0x7C000038, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
   // RA <- (RS) & (RB)
 
-  if (i.X.Rc) {
-    // With cr0 update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-
   Value* v = b.CreateAnd(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB));
   g.update_gpr_value(i.X.RA, v);
 
+  if (i.X.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
   return 0;
 }
 
 XEEMITTER(andcx,        0x7C000078, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
   // RA <- (RS) & ¬(RB)
 
-  if (i.X.Rc) {
-    // With cr0 update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-
   Value* v = b.CreateXor(g.gpr_value(i.X.RB), -1);
   v = b.CreateAnd(g.gpr_value(i.X.RT), v);
   g.update_gpr_value(i.X.RA, v);
 
+  if (i.X.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
   return 0;
 }
 
@@ -427,8 +536,8 @@ XEEMITTER(andix,        0x70000000, D  )(FunctionGenerator& g, IRBuilder<>& b, I
   Value* v = b.CreateAnd(g.gpr_value(i.D.RT), (uint64_t)i.D.DS);
   g.update_gpr_value(i.D.RA, v);
 
-  // TODO(benvanik): update cr0
-  XEINSTRNOTIMPLEMENTED();
+  // With cr0 update.
+  g.update_cr_with_cond(0, v, b.getInt64(0), true);
 
   return 1;
 }
@@ -439,8 +548,8 @@ XEEMITTER(andisx,       0x74000000, D  )(FunctionGenerator& g, IRBuilder<>& b, I
   Value* v = b.CreateAnd(g.gpr_value(i.D.RT), ((uint64_t)i.D.DS) << 16);
   g.update_gpr_value(i.D.RA, v);
 
-  // TODO(benvanik): update cr0
-  XEINSTRNOTIMPLEMENTED();
+  // With cr0 update.
+  g.update_cr_with_cond(0, v, b.getInt64(0), true);
 
   return 1;
 }
@@ -457,11 +566,6 @@ XEEMITTER(cntlzwx,      0x7C000034, X  )(FunctionGenerator& g, IRBuilder<>& b, I
   //   n <- n + 1
   // RA <- n - 32
 
-  if (i.X.Rc) {
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-
   Value* v = g.gpr_value(i.X.RT);
   v = b.CreateTrunc(v, b.getInt32Ty());
 
@@ -474,6 +578,11 @@ XEEMITTER(cntlzwx,      0x7C000034, X  )(FunctionGenerator& g, IRBuilder<>& b, I
   count = b.CreateZExt(count, b.getInt64Ty());
   g.update_gpr_value(i.X.RA, count);
 
+  if (i.X.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, count, b.getInt64(0), true);
+  }
+
   return 0;
 }
 
@@ -487,16 +596,16 @@ XEEMITTER(extsbx,       0x7C000774, X  )(FunctionGenerator& g, IRBuilder<>& b, I
   // RA[56:63] <- (RS)[56:63]
   // RA[0:55] <- i56.s
 
-  if (i.X.Rc) {
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-
   Value* v = g.gpr_value(i.X.RT);
   v = b.CreateTrunc(v, b.getInt8Ty());
   v = b.CreateSExt(v, b.getInt64Ty());
   g.update_gpr_value(i.X.RA, v);
 
+  if (i.X.Rc) {
+    // Update cr0.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
   return 0;
 }
 
@@ -518,31 +627,29 @@ XEEMITTER(nandx,        0x7C0003B8, X  )(FunctionGenerator& g, IRBuilder<>& b, I
 XEEMITTER(norx,         0x7C0000F8, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
   // RA <- ¬((RS) | (RB))
 
-  if (i.X.Rc) {
-    // With cr0 update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-
   Value* v = b.CreateOr(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB));
   v = b.CreateXor(v, -1);
   g.update_gpr_value(i.X.RA, v);
 
+  if (i.X.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
   return 0;
 }
 
 XEEMITTER(orx,          0x7C000378, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
   // RA <- (RS) | (RB)
 
-  if (i.X.Rc) {
-    // With cr0 update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-
   Value* v = b.CreateOr(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB));
   g.update_gpr_value(i.X.RA, v);
 
+  if (i.X.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
   return 0;
 }
 
@@ -572,15 +679,14 @@ XEEMITTER(oris,         0x64000000, D  )(FunctionGenerator& g, IRBuilder<>& b, I
 XEEMITTER(xorx,         0x7C000278, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
   // RA <- (RS) XOR (RB)
 
-  if (i.X.Rc) {
-    // With cr0 update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-
   Value* v = b.CreateXor(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB));
   g.update_gpr_value(i.X.RA, v);
 
+  if (i.X.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
   return 0;
 }
 
@@ -621,6 +727,29 @@ XEEMITTER(rldicx,       0x78000008, MD )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(rldiclx,      0x78000000, MD )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
+  // n <- sh[5] || sh[0:4]
+  // r <- ROTL64((RS), n)
+  // b <- mb[5] || mb[0:4]
+  // m <- MASK(b, 63)
+  // RA <- r & m
+
+  // uint32_t sh = (i.MD.SH5 << 5) | i.MD.SH;
+  // uint32_t mb = (i.MD.MB5 << 5) | i.MD.MB;
+
+  // Value* v = g.gpr_value(i.MD.RS);
+  // if (sh) {
+  //   v = // rotate by sh
+  // }
+  // if (mb) {
+  //   v = // mask b mb->63
+  // }
+  // g.update_gpr_value(i.MD.RA, v);
+
+  // if (i.MD.Rc) {
+  //   // With cr0 update.
+  //   g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  // }
+
   XEINSTRNOTIMPLEMENTED();
   return 1;
 }
@@ -636,6 +765,10 @@ XEEMITTER(rldimix,      0x7800000C, MD )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(rlwimix,      0x50000000, M  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
+  // n <- SH
+  // r <- ROTL32((RS)[32:63], n)
+  // m <- MASK(MB+32, ME+32)
+  // RA <- r&m | (RA)&¬m
   XEINSTRNOTIMPLEMENTED();
   return 1;
 }
@@ -646,19 +779,17 @@ XEEMITTER(rlwinmx,      0x54000000, M  )(FunctionGenerator& g, IRBuilder<>& b, I
   // m <- MASK(MB+32, ME+32)
   // RA <- r & m
 
-  if (i.M.Rc) {
-    // With cr0 update.
-    XEINSTRNOTIMPLEMENTED();
-    return 1;
-  }
-
   // The compiler will generate a bunch of these for the special case of
   // SH=0, MB=ME
   // Which seems to just select a single bit and set cr0 for use with a branch.
   // We can detect this and do less work.
   if (!i.M.SH && i.M.MB == i.M.ME) {
     Value* v = b.CreateAnd(g.gpr_value(i.M.RS), 1 << i.M.MB);
-    g.update_gpr_value(i.M.RS, v);
+    g.update_gpr_value(i.M.RA, v);
+    if (i.M.Rc) {
+      // With cr0 update.
+      g.update_cr_with_cond(0, v, b.getInt64(0), true);
+    }
     return 0;
   }
 
@@ -669,6 +800,12 @@ XEEMITTER(rlwinmx,      0x54000000, M  )(FunctionGenerator& g, IRBuilder<>& b, I
   // v = b.CreateOr(b.CreateShl(v, i.M.SH), b.CreateLShr(v, 32 - i.M.SH));
   // v = b.CreateAnd(v, XEMASK(i.M.MB + 32, i.M.ME + 32));
 
+  // if (i.M.Rc) {
+  //   // With cr0 update.
+  //   g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  // }
+
+  printf("rlwinmx %d %d %d\n", i.M.SH, i.M.MB, i.M.ME);
   XEINSTRNOTIMPLEMENTED();
   return 1;
 }
@@ -687,8 +824,24 @@ XEEMITTER(sldx,         0x7C000036, X  )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(slwx,         0x7C000030, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // n <- (RB)[59:63]
+  // r <- ROTL32((RS)[32:63], n)
+  // if (RB)[58] = 0 then
+  //   m <- MASK(32, 63-n)
+  // else
+  //   m <- i64.0
+  // RA <- r & m
+
+  Value* v = b.CreateShl(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB));
+  v = b.CreateAnd(v, UINT32_MAX);
+  g.update_gpr_value(i.X.RA, v);
+
+  if (i.X.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
+  return 0;
 }
 
 XEEMITTER(sradx,        0x7C000634, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
@@ -707,8 +860,40 @@ XEEMITTER(srawx,        0x7C000630, X  )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(srawix,       0x7C000670, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // n <- SH
+  // r <- ROTL32((RS)[32:63], 64-n)
+  // m <- MASK(n+32, 63)
+  // s <- (RS)[32]
+  // RA <- r&m | (i64.s)&¬m
+  // CA <- s & ((r&¬m)[32:63]≠0)
+
+  Value* rs64 = g.gpr_value(i.X.RT);
+  Value* rs32 = b.CreateTrunc(rs64, b.getInt32Ty());
+
+  Value* v;
+  Value* ca;
+  if (!i.X.RB) {
+    // No shift, just a fancy sign extend and CA clearer.
+    v = rs32;
+    ca = b.getInt64(0);
+  } else {
+    v = b.CreateAShr(rs32, i.X.RB);
+
+    // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number
+    // and any 1-bits are shifted out of position 63; otherwise CA is set to 0.
+    ca = b.CreateAnd(b.CreateICmpSLT(v, b.getInt32(0)),
+                     b.CreateICmpSLT(rs64, b.getInt64(0)));
+  }
+  v = b.CreateSExt(v, b.getInt64Ty());
+  g.update_gpr_value(i.X.RA, v);
+  g.update_xer_with_carry(ca);
+
+  if (i.X.Rc) {
+    // With cr0 update.
+    g.update_cr_with_cond(0, v, b.getInt64(0), true);
+  }
+
+  return 0;
 }
 
 XEEMITTER(srdx,         0x7C000436, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
diff --git a/src/cpu/codegen/emit_control.cc b/src/cpu/codegen/emit_control.cc
index 669273f86..1bfdbd10c 100644
--- a/src/cpu/codegen/emit_control.cc
+++ b/src/cpu/codegen/emit_control.cc
@@ -436,24 +436,159 @@ XEEMITTER(sc,           0x44000002, SC )(FunctionGenerator& g, IRBuilder<>& b, I
 
 // Trap (A-25)
 
+int XeEmitTrap(FunctionGenerator& g, IRBuilder<>& b, InstrData& i,
+                Value* va, Value* vb, uint32_t TO) {
+  // if (a < b) & TO[0] then TRAP
+  // if (a > b) & TO[1] then TRAP
+  // if (a = b) & TO[2] then TRAP
+  // if (a <u b) & TO[3] then TRAP
+  // if (a >u b) & TO[4] then TRAP
+  // Bits swapped:
+  // 01234
+  // 43210
+
+  if (!TO) {
+    return 0;
+  }
+
+  BasicBlock* after_bb = BasicBlock::Create(*g.context(), "", g.gen_fn(),
+                                            g.GetNextBasicBlock());
+  BasicBlock* trap_bb = BasicBlock::Create(*g.context(), "", g.gen_fn(),
+                                           after_bb);
+
+  // Create the basic blocks (so we can chain).
+  std::vector<BasicBlock*> bbs;
+  if (TO & (1 << 4)) {
+    bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb));
+  }
+  if (TO & (1 << 3)) {
+    bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb));
+  }
+  if (TO & (1 << 2)) {
+    bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb));
+  }
+  if (TO & (1 << 1)) {
+    bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb));
+  }
+  if (TO & (1 << 0)) {
+    bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb));
+  }
+  bbs.push_back(after_bb);
+
+  // Jump to the first bb.
+  b.CreateBr(bbs.front());
+
+  // Setup each basic block.
+  std::vector<BasicBlock*>::iterator it = bbs.begin();
+  if (TO & (1 << 4)) {
+    // a < b
+    BasicBlock* bb = *(it++);
+    b.SetInsertPoint(bb);
+    Value* cmp = b.CreateICmpSLT(va, vb);
+    b.CreateCondBr(cmp, trap_bb, *it);
+  }
+  if (TO & (1 << 3)) {
+    // a > b
+    BasicBlock* bb = *(it++);
+    b.SetInsertPoint(bb);
+    Value* cmp = b.CreateICmpSGT(va, vb);
+    b.CreateCondBr(cmp, trap_bb, *it);
+  }
+  if (TO & (1 << 2)) {
+    // a = b
+    BasicBlock* bb = *(it++);
+    b.SetInsertPoint(bb);
+    Value* cmp = b.CreateICmpEQ(va, vb);
+    b.CreateCondBr(cmp, trap_bb, *it);
+  }
+  if (TO & (1 << 1)) {
+    // a <u b
+    BasicBlock* bb = *(it++);
+    b.SetInsertPoint(bb);
+    Value* cmp = b.CreateICmpULT(va, vb);
+    b.CreateCondBr(cmp, trap_bb, *it);
+  }
+  if (TO & (1 << 0)) {
+    // a >u b
+    BasicBlock* bb = *(it++);
+    b.SetInsertPoint(bb);
+    Value* cmp = b.CreateICmpUGT(va, vb);
+    b.CreateCondBr(cmp, trap_bb, *it);
+  }
+
+  // Create trap BB.
+  b.SetInsertPoint(trap_bb);
+  g.SpillRegisters();
+  // TODO(benvanik): use @llvm.debugtrap? could make debugging better
+  b.CreateCall2(g.gen_module()->getGlobalVariable("XeTrap"),
+                g.gen_fn()->arg_begin(),
+                b.getInt32(i.address));
+  b.CreateBr(after_bb);
+
+  // Resume.
+  b.SetInsertPoint(after_bb);
+
+  return 0;
+}
+
 XEEMITTER(td,           0x7C000088, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // a <- (RA)
+  // b <- (RB)
+  // if (a < b) & TO[0] then TRAP
+  // if (a > b) & TO[1] then TRAP
+  // if (a = b) & TO[2] then TRAP
+  // if (a <u b) & TO[3] then TRAP
+  // if (a >u b) & TO[4] then TRAP
+  return XeEmitTrap(g, b, i,
+                    g.gpr_value(i.X.RA),
+                    g.gpr_value(i.X.RB),
+                    i.X.RT);
 }
 
 XEEMITTER(tdi,          0x08000000, D  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // a <- (RA)
+  // if (a < EXTS(SI)) & TO[0] then TRAP
+  // if (a > EXTS(SI)) & TO[1] then TRAP
+  // if (a = EXTS(SI)) & TO[2] then TRAP
+  // if (a <u EXTS(SI)) & TO[3] then TRAP
+  // if (a >u EXTS(SI)) & TO[4] then TRAP
+  return XeEmitTrap(g, b, i,
+                    g.gpr_value(i.D.RA),
+                    b.getInt64(XEEXTS16(i.D.DS)),
+                    i.D.RT);
 }
 
 XEEMITTER(tw,           0x7C000008, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // a <- EXTS((RA)[32:63])
+  // b <- EXTS((RB)[32:63])
+  // if (a < b) & TO[0] then TRAP
+  // if (a > b) & TO[1] then TRAP
+  // if (a = b) & TO[2] then TRAP
+  // if (a <u b) & TO[3] then TRAP
+  // if (a >u b) & TO[4] then TRAP
+  return XeEmitTrap(g, b, i,
+                    b.CreateSExt(b.CreateTrunc(g.gpr_value(i.X.RA),
+                                               b.getInt32Ty()),
+                                 b.getInt64Ty()),
+                    b.CreateSExt(b.CreateTrunc(g.gpr_value(i.X.RB),
+                                               b.getInt32Ty()),
+                                 b.getInt64Ty()),
+                    i.X.RT);
 }
 
 XEEMITTER(twi,          0x0C000000, D  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // a <- EXTS((RA)[32:63])
+  // if (a < EXTS(SI)) & TO[0] then TRAP
+  // if (a > EXTS(SI)) & TO[1] then TRAP
+  // if (a = EXTS(SI)) & TO[2] then TRAP
+  // if (a <u EXTS(SI)) & TO[3] then TRAP
+  // if (a >u EXTS(SI)) & TO[4] then TRAP
+  return XeEmitTrap(g, b, i,
+                    b.CreateSExt(b.CreateTrunc(g.gpr_value(i.D.RA),
+                                               b.getInt32Ty()),
+                                 b.getInt64Ty()),
+                    b.getInt64(XEEXTS16(i.D.DS)),
+                    i.D.RT);
 }
 
 
diff --git a/src/cpu/codegen/emit_memory.cc b/src/cpu/codegen/emit_memory.cc
index adba42036..d329ae42e 100644
--- a/src/cpu/codegen/emit_memory.cc
+++ b/src/cpu/codegen/emit_memory.cc
@@ -658,13 +658,15 @@ XEEMITTER(dcbst,        0x7C00006C, X  )(FunctionGenerator& g, IRBuilder<>& b, I
 }
 
 XEEMITTER(dcbt,         0x7C00022C, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // No-op for now.
+  // TODO(benvanik): use @llvm.prefetch
+  return 0;
 }
 
 XEEMITTER(dcbtst,       0x7C0001EC, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
-  XEINSTRNOTIMPLEMENTED();
-  return 1;
+  // No-op for now.
+  // TODO(benvanik): use @llvm.prefetch
+  return 0;
 }
 
 XEEMITTER(dcbz,         0x7C0007EC, X  )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) {
diff --git a/src/cpu/codegen/function_generator.cc b/src/cpu/codegen/function_generator.cc
index 40e286e83..4863c3a26 100644
--- a/src/cpu/codegen/function_generator.cc
+++ b/src/cpu/codegen/function_generator.cc
@@ -489,7 +489,7 @@ void FunctionGenerator::FillRegisters() {
   }
 
   // Note that we skip zero.
-  for (uint32_t n = 1; n < XECOUNT(locals_.gpr); n++) {
+  for (size_t n = 0; n < XECOUNT(locals_.gpr); n++) {
     if (locals_.gpr[n]) {
       b.CreateStore(LoadStateValue(
           offsetof(xe_ppc_state_t, r) + 8 * n,
@@ -550,7 +550,7 @@ void FunctionGenerator::SpillRegisters() {
   }
 
   // Note that we skip zero.
-  for (uint32_t n = 1; n < XECOUNT(locals_.gpr); n++) {
+  for (uint32_t n = 0; n < XECOUNT(locals_.gpr); n++) {
     Value* v = locals_.gpr[n];
     if (v) {
       StoreStateValue(
@@ -591,6 +591,62 @@ void FunctionGenerator::update_xer_value(Value* value) {
   b.CreateStore(value, locals_.xer);
 }
 
+void FunctionGenerator::update_xer_with_overflow(Value* value) {
+  IRBuilder<>& b = *builder_;
+
+  setup_xer();
+
+  // Expects a i1 indicating overflow.
+  // Trust the caller that if it's larger than that it's already truncated.
+  if (!value->getType()->isIntegerTy(64)) {
+    value = b.CreateZExt(value, b.getInt64Ty());
+  }
+
+  Value* xer = xer_value();
+  xer = b.CreateAnd(xer, 0xFFFFFFFFBFFFFFFF); // clear bit 30
+  xer = b.CreateOr(xer, b.CreateShl(value, 31));
+  xer = b.CreateOr(xer, b.CreateShl(value, 30));
+  b.CreateStore(xer, locals_.xer);
+}
+
+void FunctionGenerator::update_xer_with_carry(Value* value) {
+  IRBuilder<>& b = *builder_;
+
+  setup_xer();
+
+  // Expects a i1 indicating carry.
+  // Trust the caller that if it's larger than that it's already truncated.
+  if (!value->getType()->isIntegerTy(64)) {
+    value = b.CreateZExt(value, b.getInt64Ty());
+  }
+
+  Value* xer = xer_value();
+  xer = b.CreateAnd(xer, 0xFFFFFFFFDFFFFFFF); // clear bit 29
+  xer = b.CreateOr(xer, b.CreateShl(value, 29));
+  b.CreateStore(xer, locals_.xer);
+}
+
+void FunctionGenerator::update_xer_with_overflow_and_carry(Value* value) {
+  IRBuilder<>& b = *builder_;
+
+  setup_xer();
+
+  // Expects a i1 indicating overflow.
+  // Trust the caller that if it's larger than that it's already truncated.
+  if (!value->getType()->isIntegerTy(64)) {
+    value = b.CreateZExt(value, b.getInt64Ty());
+  }
+
+  // This is effectively an update_xer_with_overflow followed by an
+  // update_xer_with_carry, but since the logic is largely the same share it.
+  Value* xer = xer_value();
+  xer = b.CreateAnd(xer, 0xFFFFFFFF9FFFFFFF); // clear bit 30 & 29
+  xer = b.CreateOr(xer, b.CreateShl(value, 31));
+  xer = b.CreateOr(xer, b.CreateShl(value, 30));
+  xer = b.CreateOr(xer, b.CreateShl(value, 29));
+  b.CreateStore(xer, locals_.xer);
+}
+
 void FunctionGenerator::setup_lr() {
   IRBuilder<>& b = *builder_;
 
@@ -679,10 +735,50 @@ void FunctionGenerator::update_cr_value(uint32_t n, Value* value) {
 
   setup_cr(n);
 
-  value = b.CreateTrunc(value, b.getInt8Ty());
+  // Truncate to 8 bits if needed.
+  // TODO(benvanik): also widen?
+  if (!value->getType()->isIntegerTy(8)) {
+    value = b.CreateTrunc(value, b.getInt8Ty());
+  }
+
   b.CreateStore(value, locals_.cr[n]);
 }
 
+void FunctionGenerator::update_cr_with_cond(
+    uint32_t n, Value* lhs, Value* rhs, bool is_signed) {
+  IRBuilder<>& b = *builder_;
+
+  // bit0 = RA < RB
+  // bit1 = RA > RB
+  // bit2 = RA = RB
+  // bit3 = XER[SO]
+  // Bits are reversed:
+  // 0123
+  // 3210
+
+  // TODO(benvanik): inline this using the x86 cmp instruction - this prevents
+  // the need for a lot of the compares and ensures we lower to the best
+  // possible x86.
+  // Value* cmp = InlineAsm::get(
+  //     FunctionType::get(),
+  //     "cmp $0, $1                 \n"
+  //     "mov from compare registers \n",
+  //     "r,r", ??
+  //     true);
+
+  Value* is_lt = is_signed ?
+      b.CreateICmpSLT(lhs, rhs) : b.CreateICmpULT(lhs, rhs);
+  Value* is_gt = is_signed ?
+      b.CreateICmpSGT(lhs, rhs) : b.CreateICmpUGT(lhs, rhs);
+  Value* cp = b.CreateSelect(is_gt, b.getInt8(1 << 2), b.getInt8(1 << 1));
+  Value* c = b.CreateSelect(is_lt, b.getInt8(1 << 3), cp);
+
+  // TODO(benvanik): set bit 4 to XER[SO]
+
+  // Insert the 4 bits into their location in the CR.
+  update_cr_value(n, c);
+}
+
 void FunctionGenerator::setup_gpr(uint32_t n) {
   IRBuilder<>& b = *builder_;
 
@@ -699,10 +795,13 @@ Value* FunctionGenerator::gpr_value(uint32_t n) {
   IRBuilder<>& b = *builder_;
 
   XEASSERT(n >= 0 && n < 32);
-  if (n == 0) {
-    // Always force zero to a constant - this should help LLVM.
-    return b.getInt64(0);
-  }
+
+  // Actually r0 is writable, even though nobody should ever do that.
+  // Perhaps we can check usage and enable this if safe?
+  // if (n == 0) {
+  //   // Always force zero to a constant - this should help LLVM.
+  //   return b.getInt64(0);
+  // }
 
   setup_gpr(n);
 
@@ -713,10 +812,12 @@ void FunctionGenerator::update_gpr_value(uint32_t n, Value* value) {
   IRBuilder<>& b = *builder_;
 
   XEASSERT(n >= 0 && n < 32);
-  if (n == 0) {
-    // Ignore writes to zero.
-    return;
-  }
+
+  // See above - r0 can be written.
+  // if (n == 0) {
+  //   // Ignore writes to zero.
+  //   return;
+  // }
 
   setup_gpr(n);
 
diff --git a/src/cpu/codegen/module_generator.cc b/src/cpu/codegen/module_generator.cc
index 8f2244c17..6d7d30d80 100644
--- a/src/cpu/codegen/module_generator.cc
+++ b/src/cpu/codegen/module_generator.cc
@@ -250,6 +250,7 @@ void ModuleGenerator::BuildFunction(CodegenFunction* cgf) {
 
 void ModuleGenerator::OptimizeFunction(Module* m, Function* fn) {
   FunctionPassManager pm(m);
+  //fn->dump();
   if (FLAGS_optimize_ir_functions) {
     PassManagerBuilder pmb;
     pmb.OptLevel      = 3;
diff --git a/src/cpu/exec_module.cc b/src/cpu/exec_module.cc
index 7de1fda40..cdc936352 100644
--- a/src/cpu/exec_module.cc
+++ b/src/cpu/exec_module.cc
@@ -202,6 +202,11 @@ XECLEANUP:
   return result_code;
 }
 
+void XeTrap(xe_ppc_state_t* state, uint32_t cia) {
+  printf("TRAP");
+  XEASSERTALWAYS();
+}
+
 void XeIndirectBranch(xe_ppc_state_t* state, uint64_t target, uint64_t br_ia) {
   printf("INDIRECT BRANCH %.8X -> %.8X\n", (uint32_t)br_ia, (uint32_t)target);
   XEASSERTALWAYS();
@@ -255,6 +260,16 @@ int ExecModule::InjectGlobals() {
       int8PtrTy));
 
   // Control methods:
+  std::vector<Type*> trapArgs;
+  trapArgs.push_back(int8PtrTy);
+  trapArgs.push_back(Type::getInt32Ty(context));
+  FunctionType* trapTy = FunctionType::get(
+      Type::getVoidTy(context), trapArgs, false);
+  gv = new GlobalVariable(*gen_module_, trapTy, true,
+                          GlobalVariable::ExternalLinkage, 0,
+                          "XeTrap");
+  engine_->addGlobalMapping(gv, (void*)&XeTrap);
+
   std::vector<Type*> indirectBranchArgs;
   indirectBranchArgs.push_back(int8PtrTy);
   indirectBranchArgs.push_back(Type::getInt64Ty(context));