From 417a970aab676ff3d30f4e78312dfd96327ff25f Mon Sep 17 00:00:00 2001
From: Ben Vanik <ben.vanik@gmail.com>
Date: Sun, 15 Dec 2013 13:58:40 -0800
Subject: [PATCH] Various fixes.

---
 src/alloy/backend/ivm/ivm_intcode.cc       | 12 +++++++-----
 src/alloy/frontend/ppc/ppc_emit_altivec.cc | 12 ++++++------
 src/alloy/hir/value.cc                     |  6 +++---
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc
index 89fba86a8..f6c1d3203 100644
--- a/src/alloy/backend/ivm/ivm_intcode.cc
+++ b/src/alloy/backend/ivm/ivm_intcode.cc
@@ -1066,7 +1066,7 @@ int Translate_VECTOR_CONVERT_I2F(TranslationContext& ctx, Instr* i) {
   return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_I2F);
 }
 
-static uint8_t __lvsl_table[16][16] = {
+static uint8_t __lvsl_table[17][16] = {
   { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15},
   { 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16},
   { 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17},
@@ -1083,8 +1083,9 @@ static uint8_t __lvsl_table[16][16] = {
   {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28},
   {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29},
   {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30},
+  {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
 };
-static uint8_t __lvsr_table[16][16] = {
+static uint8_t __lvsr_table[17][16] = {
   {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
   {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30},
   {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29},
@@ -1101,10 +1102,11 @@ static uint8_t __lvsr_table[16][16] = {
   { 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
   { 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17},
   { 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16},
+  { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15},
 };
 
 uint32_t IntCode_LOAD_VECTOR_SHL(IntCodeState& ics, const IntCode* i) {
-  int8_t sh = ics.rf[i->src1_reg].i8;
+  int8_t sh = ics.rf[i->src1_reg].i8 & 0x1F;
   vec128_t& dest = ics.rf[i->dest_reg].v128;
   for (int n = 0; n < 16; n++) {
     dest.b16[n] = __lvsl_table[sh][n];
@@ -1116,9 +1118,9 @@ int Translate_LOAD_VECTOR_SHL(TranslationContext& ctx, Instr* i) {
 }
 
 uint32_t IntCode_LOAD_VECTOR_SHR(IntCodeState& ics, const IntCode* i) {
-  int8_t sh = ics.rf[i->src1_reg].i8;
+  int8_t sh = ics.rf[i->src1_reg].i8 & 0x1F;
   vec128_t& dest = ics.rf[i->dest_reg].v128;
-  for (int n = 0; n < 4; n++) {
+  for (int n = 0; n < 16; n++) {
     dest.b16[n] = __lvsr_table[sh][n];
   }
   return IA_NEXT;
diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc
index 88c94ae5f..c6f34dad6 100644
--- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc
+++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc
@@ -294,9 +294,9 @@ int InstrEmit_stvlx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t
   //       we could optimize this to prevent the other load/mask, in that case.
   Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb);
   Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
-  Value* new_value = f.ByteSwap(f.LoadVR(vd));
+  Value* new_value = f.LoadVR(vd);
   // ea &= ~0xF (load takes care of this)
-  Value* old_value = f.Load(ea, VEC128_TYPE);
+  Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE));
   // v = (new >> eb) | (old & (ONE << (16 - eb)))
   Value* v = f.Permute(
       f.LoadVectorShr(eb),
@@ -313,7 +313,7 @@ int InstrEmit_stvlx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t
               f.LoadZero(VEC128_TYPE),
               INT8_TYPE)));
   // ea &= ~0xF (store takes care of this)
-  f.Store(ea, v);
+  f.Store(ea, f.ByteSwap(v));
   return 0;
 }
 XEEMITTER(stvlx,          0x7C00050E, X   )(PPCFunctionBuilder& f, InstrData& i) {
@@ -335,9 +335,9 @@ int InstrEmit_stvrx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t
   Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb);
   Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
   Value* ebits = f.Mul(eb, f.LoadConstant((int8_t)8));
-  Value* new_value = f.ByteSwap(f.LoadVR(vd));
+  Value* new_value = f.LoadVR(vd);
   // ea &= ~0xF (load takes care of this)
-  Value* old_value = f.Load(ea, VEC128_TYPE);
+  Value* old_value = f.ByteSwap(f.Load(ea, VEC128_TYPE));
   // v = (new << (16 - eb)) | (old & (ONE >> eb))
   Value* v = f.Permute(
       f.LoadVectorShl(f.Sub(f.LoadConstant((int8_t)16), eb)),
@@ -354,7 +354,7 @@ int InstrEmit_stvrx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t
               f.Not(f.LoadZero(VEC128_TYPE)),
               INT8_TYPE)));
   // ea &= ~0xF (store takes care of this)
-  f.Store(ea, v);
+  f.Store(ea, f.ByteSwap(v));
   return 0;
 }
 XEEMITTER(stvrx,          0x7C00054E, X   )(PPCFunctionBuilder& f, InstrData& i) {
diff --git a/src/alloy/hir/value.cc b/src/alloy/hir/value.cc
index 5d1ca80d5..cd8935842 100644
--- a/src/alloy/hir/value.cc
+++ b/src/alloy/hir/value.cc
@@ -62,15 +62,15 @@ void Value::ZeroExtend(TypeName target_type) {
   switch (type) {
   case INT8_TYPE:
     type = target_type;
-    constant.i64 = constant.i64 & ~0xFF;
+    constant.i64 = constant.i64 & 0xFF;
     return;
   case INT16_TYPE:
     type = target_type;
-    constant.i64 = constant.i64 & ~0xFFFF;
+    constant.i64 = constant.i64 & 0xFFFF;
     return;
   case INT32_TYPE:
     type = target_type;
-    constant.i64 = constant.i64 & ~0xFFFFFFFF;
+    constant.i64 = constant.i64 & 0xFFFFFFFF;
     return;
   }
   // Unsupported types.