Probably broke everything.

This commit is contained in:
gibbed 2014-01-24 00:00:20 -08:00
parent dfbbefa97f
commit 855173b2bd
1 changed files with 17 additions and 12 deletions

View File

@ -45,6 +45,17 @@ namespace ivm {
//#define DPRINT if (ics.thread_state->thread_id() == 1) printf
//#define DFLUSH() fflush(stdout)
#if XE_CPU_BIGENDIAN
#define VECTORBYTEOFFSET(n) (n)
#else
static const uint8_t __vector_byte_offset_table[16] = {
3, 2, 1, 0,
7, 6, 5, 4,
11, 10, 9, 8,
15, 14, 13, 12,
};
#define VECTORBYTEOFFSET(n) (__vector_byte_offset_table[n])
#endif
uint32_t IntCode_INT_LOAD_CONSTANT(IntCodeState& ics, const IntCode* i) {
// TODO(benvanik): optimize on type to avoid 16b copy per load.
@ -1245,7 +1256,7 @@ uint32_t IntCode_LOAD_VECTOR_SHL(IntCodeState& ics, const IntCode* i) {
int8_t sh = MIN(16, ics.rf[i->src1_reg].i8);
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 16; n++) {
dest.b16[n] = __lvsl_table[sh][n];
dest.b16[n] = __lvsl_table[sh][VECTORBYTEOFFSET(n)];
}
return IA_NEXT;
}
@ -1257,7 +1268,7 @@ uint32_t IntCode_LOAD_VECTOR_SHR(IntCodeState& ics, const IntCode* i) {
int8_t sh = MIN(16, ics.rf[i->src1_reg].i8);
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 16; n++) {
dest.b16[n] = __lvsr_table[sh][n];
dest.b16[n] = __lvsr_table[sh][VECTORBYTEOFFSET(n)];
}
return IA_NEXT;
}
@ -3681,16 +3692,10 @@ uint32_t IntCode_PERMUTE_V128_BY_INT32(IntCodeState& ics, const IntCode* i) {
}
return IA_NEXT;
}
static const uint8_t __swap_table[16] = {
3, 2, 1, 0,
7, 6, 5, 4,
11, 10, 9, 8,
15, 14, 13, 12,
};
uint8_t grab(const vec128_t& src, uint8_t index) {
return (index < 8
? (src.low >> (__swap_table[index] << 3))
: (src.high >> ((__swap_table[index - 8]) << 3))) & 0xFF;
? (src.low >> (VECTORBYTEOFFSET(index) << 3))
: (src.high >> ((VECTORBYTEOFFSET(index - 8)) << 3))) & 0xFF;
}
uint32_t IntCode_PERMUTE_V128_BY_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& table = ics.rf[i->src1_reg].v128;
@ -3699,12 +3704,12 @@ uint32_t IntCode_PERMUTE_V128_BY_V128(IntCodeState& ics, const IntCode* i) {
vec128_t& dests = ics.rf[i->dest_reg].v128;
dests.low = dests.high = 0;
for (size_t n = 0; n < 16; n++) {
uint8_t index = table.b16[n] & 0x1F;
uint8_t index = table.b16[VECTORBYTEOFFSET(n)] & 0x1F;
uint8_t value = index < 16
? grab(src2, index)
: grab(src3, index - 16);
uint64_t& dest = n < 8 ? dests.low : dests.high;
uint8_t shift = __swap_table[(n < 8 ? n : (n - 8))] << 3;
uint8_t shift = VECTORBYTEOFFSET((n < 8 ? n : (n - 8))) << 3;
dest |= (((uint64_t)value) << shift);
}
return IA_NEXT;