bsnes/higan/processor/m68k/instructions.cpp

615 lines
17 KiB
C++
Raw Normal View History

auto M68K::testCondition(uint4 condition) -> bool {
switch(condition) {
case 0: return true; //T
case 1: return false; //F
case 2: return !r.c && !r.z; //HI
case 3: return r.c || r.z; //LS
case 4: return !r.c; //CC,HS
case 5: return r.c; //CS,LO
case 6: return !r.z; //NE
case 7: return r.z; //EQ
case 8: return !r.v; //VC
case 9: return r.v; //VS
case 10: return !r.n; //PL
case 11: return r.n; //MI
case 12: return r.n == r.v; //GE
case 13: return r.n != r.v; //LT
case 14: return r.n == r.v && !r.z; //GT
case 15: return r.n != r.v || r.z; //LE
}
unreachable;
}
//
template<> auto M68K::bytes<Byte>() -> uint { return 1; }
template<> auto M68K::bytes<Word>() -> uint { return 2; }
template<> auto M68K::bytes<Long>() -> uint { return 4; }
template<> auto M68K::bits<Byte>() -> uint { return 8; }
template<> auto M68K::bits<Word>() -> uint { return 16; }
template<> auto M68K::bits<Long>() -> uint { return 32; }
template<uint Size> auto M68K::lsb() -> uint32 { return 1; }
template<> auto M68K::msb<Byte>() -> uint32 { return 0x80; }
template<> auto M68K::msb<Word>() -> uint32 { return 0x8000; }
template<> auto M68K::msb<Long>() -> uint32 { return 0x80000000; }
template<> auto M68K::mask<Byte>() -> uint32 { return 0xff; }
template<> auto M68K::mask<Word>() -> uint32 { return 0xffff; }
template<> auto M68K::mask<Long>() -> uint32 { return 0xffffffff; }
template<> auto M68K::clip<Byte>(uint32 data) -> uint32 { return data & 0xff; }
template<> auto M68K::clip<Word>(uint32 data) -> uint32 { return data & 0xffff; }
template<> auto M68K::clip<Long>(uint32 data) -> uint32 { return data & 0xffffffff; }
template<> auto M68K::sign<Byte>(uint32 data) -> int32 { return (int8)data; }
template<> auto M68K::sign<Word>(uint32 data) -> int32 { return (int16)data; }
template<> auto M68K::sign<Long>(uint32 data) -> int32 { return (int32)data; }
template<uint Size> auto M68K::zero(uint32 result) -> bool {
return clip<Size>(result) == 0;
}
template<uint Size> auto M68K::negative(uint32 result) -> bool {
return sign<Size>(result) < 0;
}
//
template<uint Size> auto M68K::ADD(uint32 source, uint32 target) -> uint32 {
uint64 result = (uint64)source + (uint64)target;
r.c = sign<Size>(result >> 1) < 0;
r.v = sign<Size>(~(target ^ source) & (target ^ result)) < 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
r.x = r.c;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionADD(DataRegister dr, uint1 direction, EffectiveAddress ea) -> void {
if(direction == 0) {
auto source = read<Size>(ea);
auto target = read<Size>(dr);
auto result = ADD<Size>(source, target);
write<Size>(dr, result);
} else {
auto source = read<Size>(dr);
auto target = read<Size>(ea);
auto result = ADD<Size>(source, target);
write<Size>(ea, result);
}
}
template<uint Size> auto M68K::instructionADDI(EffectiveAddress modify) -> void {
auto source = readPC<Size>();
auto target = read<Size>(modify);
auto result = ADD<Size>(source, target);
write<Size>(modify, result);
}
template<uint Size> auto M68K::instructionADDA(AddressRegister ar, EffectiveAddress ea) -> void {
auto source = read<Size>(ea);
auto target = read<Size>(ar);
write<Long>(ar, source + target);
}
template<uint Size> auto M68K::instructionADDQ(uint4 immediate, EffectiveAddress modify) -> void {
auto source = read<Size>(modify);
auto target = immediate;
auto result = ADD<Size>(source, target);
write<Size>(modify, result);
}
template<uint Size> auto M68K::instructionANDI(EffectiveAddress ea) -> void {
auto source = readPC<Size>();
auto target = read<Size, NoUpdate>(ea);
auto result = target & source;
write<Size>(ea, result);
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
r.c = 0;
r.v = 0;
r.z = zero<Size>(result);
r.n = negative<Size>(result);
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
}
auto M68K::instructionANDI_TO_CCR() -> void {
auto data = readPC<Word>();
writeCCR(readCCR() & data);
}
auto M68K::instructionANDI_TO_SR() -> void {
if(!supervisor()) return;
auto data = readPC<Word>();
writeSR(readSR() & data);
}
template<uint Size> auto M68K::ASL(uint32 result, uint shift) -> uint32 {
bool carry = false;
uint32 overflow = 0;
for(auto _ : range(shift)) {
carry = result & msb<Size>();
uint32 before = result;
result <<= 1;
overflow |= before ^ result;
}
r.c = carry;
r.v = sign<Size>(overflow) < 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
if(shift) r.x = r.c;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionASL(uint4 shift, DataRegister modify) -> void {
auto result = ASL<Size>(read<Size>(modify), shift);
write<Size>(modify, result);
}
template<uint Size> auto M68K::instructionASL(DataRegister shift, DataRegister modify) -> void {
auto count = read<Long>(shift) & 63;
auto result = ASL<Size>(read<Size>(modify), count);
write<Size>(modify, result);
}
auto M68K::instructionASL(EffectiveAddress modify) -> void {
auto result = ASL<Word>(read<Word, NoUpdate>(modify), 1);
write<Word>(modify, result);
}
template<uint Size> auto M68K::ASR(uint32 result, uint shift) -> uint32 {
bool carry = false;
uint32 overflow = 0;
for(auto _ : range(shift)) {
carry = result & lsb<Size>();
uint32 before = result;
result = sign<Size>(result) >> 1;
overflow |= before ^ result;
}
r.c = carry;
r.v = sign<Size>(overflow) < 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
if(shift) r.x = r.c;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionASR(uint4 shift, DataRegister modify) -> void {
auto result = ASR<Size>(read<Size>(modify), shift);
write<Size>(modify, result);
}
template<uint Size> auto M68K::instructionASR(DataRegister shift, DataRegister modify) -> void {
auto count = read<Long>(shift) & 63;
auto result = ASR<Size>(read<Size>(modify), count);
write<Size>(modify, result);
}
auto M68K::instructionASR(EffectiveAddress modify) -> void {
auto result = ASR<Word>(read<Word, NoUpdate>(modify), 1);
write<Word>(modify, result);
}
auto M68K::instructionBCC(uint4 condition, uint8 displacement) -> void {
auto extension = readPC<Word>();
if(condition == 1) push<Long>(r.pc);
if(condition >= 2 && !testCondition(condition)) { //0 = BRA; 1 = BSR
if(displacement) r.pc -= 2;
} else {
r.pc -= 2;
r.pc += displacement ? sign<Byte>(displacement) : sign<Word>(extension);
}
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
}
template<uint Size> auto M68K::instructionBTST(DataRegister dr, EffectiveAddress ea) -> void {
auto bit = read<Size>(dr);
auto test = read<Size>(ea);
bit &= bits<Size>() - 1;
r.z = test.bit(bit) == 0;
}
template<uint Size> auto M68K::instructionBTST(EffectiveAddress ea) -> void {
auto bit = (uint8)readPC<Word>();
auto test = read<Size>(ea);
bit &= bits<Size>() - 1;
r.z = test.bit(bit) == 0;
}
template<uint Size> auto M68K::instructionCLR(EffectiveAddress ea) -> void {
read<Size>(ea);
write<Size>(ea, 0);
r.c = 0;
r.v = 0;
r.z = 1;
r.n = 0;
}
template<uint Size> auto M68K::CMP(uint32 source, uint32 target) -> uint32 {
uint64 result = (uint64)target - (uint64)source;
r.c = sign<Size>(result >> 1) < 0;
r.v = sign<Size>((target ^ source) & (target ^ result)) < 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionCMP(DataRegister dr, EffectiveAddress ea) -> void {
auto source = read<Size>(ea);
auto target = read<Size>(dr);
CMP<Size>(source, target);
}
template<uint Size> auto M68K::instructionCMPA(AddressRegister ar, EffectiveAddress ea) -> void {
auto source = read<Size>(ea);
auto target = read<Size>(ar);
CMP<Size>(source, target);
}
template<uint Size> auto M68K::instructionCMPI(EffectiveAddress ea) -> void {
auto source = readPC<Size>();
auto target = read<Size>(ea);
CMP<Size>(source, target);
}
template<uint Size> auto M68K::instructionCMPM(EffectiveAddress ax, EffectiveAddress ay) -> void {
auto source = read<Size>(ay);
auto target = read<Size>(ax);
CMP<Size>(source, target);
}
auto M68K::instructionDBCC(uint4 condition, DataRegister dr) -> void {
auto displacement = readPC<Word>();
if(!testCondition(condition)) {
uint16 result = read<Word>(dr);
write<Word>(dr, result - 1);
if(result) r.pc -= 2, r.pc += sign<Word>(displacement);
}
}
auto M68K::instructionEORI_TO_CCR() -> void {
auto data = readPC<Word>();
writeCCR(readCCR() ^ data);
}
auto M68K::instructionEORI_TO_SR() -> void {
if(!supervisor()) return;
auto data = readPC<Word>();
writeSR(readSR() ^ data);
}
auto M68K::instructionJSR(EffectiveAddress target) -> void {
push<Long>(r.pc);
r.pc = fetch<Long>(target);
}
auto M68K::instructionLEA(AddressRegister ar, EffectiveAddress ea) -> void {
write<Long>(ar, fetch<Long>(ea));
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
}
template<uint Size> auto M68K::LSL(uint32 result, uint shift) -> uint32 {
bool carry = false;
for(auto _ : range(shift)) {
carry = result & msb<Size>();
result <<= 1;
}
r.c = carry;
r.v = 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
if(shift) r.x = r.c;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionLSL(uint4 immediate, DataRegister dr) -> void {
auto result = LSL<Size>(read<Size>(dr), immediate);
write<Size>(dr, result);
}
template<uint Size> auto M68K::instructionLSL(DataRegister sr, DataRegister dr) -> void {
auto shift = read<Long>(sr) & 63;
auto result = LSL<Size>(read<Size>(dr), shift);
write<Size>(dr, result);
}
auto M68K::instructionLSL(EffectiveAddress ea) -> void {
auto result = LSL<Word>(read<Word, NoUpdate>(ea), 1);
write<Word>(ea, result);
}
template<uint Size> auto M68K::LSR(uint32 result, uint shift) -> uint32 {
bool carry = false;
for(auto _ : range(shift)) {
carry = result & lsb<Size>();
result >>= 1;
}
r.c = carry;
r.v = 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
if(shift) r.x = r.c;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionLSR(uint4 immediate, DataRegister dr) -> void {
auto result = LSR<Size>(read<Size>(dr), immediate);
write<Size>(dr, result);
}
template<uint Size> auto M68K::instructionLSR(DataRegister shift, DataRegister dr) -> void {
auto count = read<Long>(shift) & 63;
auto result = LSR<Size>(read<Size>(dr), count);
write<Size>(dr, result);
}
auto M68K::instructionLSR(EffectiveAddress ea) -> void {
auto result = LSR<Word>(read<Word, NoUpdate>(ea), 1);
write<Word>(ea, result);
}
template<uint Size> auto M68K::instructionMOVE(EffectiveAddress to, EffectiveAddress from) -> void {
auto data = read<Size>(from);
write<Size>(to, data);
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
r.c = 0;
r.v = 0;
r.z = zero<Size>(data);
r.n = negative<Size>(data);
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
}
template<uint Size> auto M68K::instructionMOVEA(AddressRegister ar, EffectiveAddress ea) -> void {
auto data = read<Size>(ea);
write<Long>(ar, data);
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
}
template<uint Size> auto M68K::instructionMOVEM(uint1 direction, EffectiveAddress ea) -> void {
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
auto list = readPC();
auto addr = fetch<Long>(ea);
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
for(uint n : range(16)) {
if(!list.bit(n)) continue;
//pre-decrement mode traverses registers in reverse order {A7-A0, D7-D0}
uint index = ea.mode == AddressRegisterIndirectWithPreDecrement ? 15 - n : n;
if(ea.mode == AddressRegisterIndirectWithPreDecrement) addr -= bytes<Size>();
if(direction == 0) {
auto data = index < 8 ? read<Size>(DataRegister{index}) : read<Size>(AddressRegister{index});
write<Size>(addr, data);
} else {
auto data = read<Size>(addr);
data = sign<Size>(data);
index < 8 ? write<Long>(DataRegister{index}, data) : write<Long>(AddressRegister{index}, data);
}
if(ea.mode == AddressRegisterIndirectWithPostIncrement) addr += bytes<Size>();
}
flush<Long>(ea, addr);
}
auto M68K::instructionMOVEQ(DataRegister dr, uint8 immediate) -> void {
write<Long>(dr, immediate);
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
r.c = 0;
r.v = 0;
r.z = zero<Byte>(immediate);
r.n = negative<Byte>(immediate);
}
auto M68K::instructionMOVE_FROM_SR(EffectiveAddress ea) -> void {
auto data = readSR();
write<Word>(ea, data);
}
auto M68K::instructionMOVE_TO_CCR(EffectiveAddress ea) -> void {
auto data = read<Byte>(ea);
writeCCR(data);
}
auto M68K::instructionMOVE_TO_SR(EffectiveAddress ea) -> void {
if(!supervisor()) return;
auto data = read<Word>(ea);
writeSR(data);
}
auto M68K::instructionMOVE_USP(uint1 direction, AddressRegister ar) -> void {
if(!supervisor()) return;
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
if(direction == 0) {
r.sp = read<Long>(ar);
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
} else {
write<Long>(ar, r.sp);
Update to v100r06 release. byuu says: Up to ten 68K instructions out of somewhere between 61 and 88, depending upon which PDF you look at. Of course, some of them aren't 100% completed yet, either. Lots of craziness with MOVEM, and BCC has a BSR variant that needs stack push/pop functions. This WIP actually took over eight hours to make, going through every possible permutation on how to design the core itself. The updated design now builds both the instruction decoder+dispatcher and the disassembler decoder into the same main loop during M68K's constructor. The special cases are also really psychotic on this processor, and I'm afraid of missing something via the fallthrough cases. So instead, I'm ordering the instructions alphabetically, and including exclusion cases to ignore binding invalid cases. If I end up remapping an existing register, then it'll throw a run-time assertion at program startup. I wanted very much to get rid of struct EA (EffectiveAddress), but it's too difficult to keep track of the internal effective address without it. So I split out the size to a separate parameter, since every opcode only has one size parameter, and otherwise it was getting duplicated in opcodes that take two EAs, and was also awkward with the flag testing. It's a bit more typing, but I feel it's more clean this way. Overall, I'm really worried this is going to be too slow. I don't want to turn the EA stuff into templates, because that will massively bloat out compilation times and object sizes, and will also need a special DSL preprocessor since C++ doesn't have a static for loop. I can definitely optimize a lot of EA's address/read/write functions away once the core is completed, but it's never going to hold a candle to a templatized 68K core. ---- Forgot to include the SA-1 regression fix. I always remember immediately after I upload and archive the WIP. Will try to get that in next time, I guess.
2016-07-16 08:39:44 +00:00
}
}
auto M68K::instructionNOP() -> void {
}
auto M68K::instructionORI_TO_CCR() -> void {
auto data = readPC<Word>();
writeCCR(readCCR() | data);
}
auto M68K::instructionORI_TO_SR() -> void {
if(!supervisor()) return;
auto data = readPC<Word>();
writeSR(readSR() | data);
}
template<uint Size> auto M68K::ROL(uint32 result, uint shift) -> uint32 {
bool carry = false;
for(auto _ : range(shift)) {
carry = result & msb<Size>();
result = result << 1 | carry;
}
r.c = carry;
r.v = 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionROL(uint4 shift, DataRegister modify) -> void {
auto result = ROL<Size>(read<Size>(modify), shift);
write<Size>(modify, result);
}
template<uint Size> auto M68K::instructionROL(DataRegister shift, DataRegister modify) -> void {
auto count = read<Long>(shift) & 63;
auto result = ROL<Size>(read<Size>(modify), count);
write<Size>(modify, result);
}
auto M68K::instructionROL(EffectiveAddress modify) -> void {
auto result = ROL<Word>(read<Word, NoUpdate>(modify), 1);
write<Word>(modify, result);
}
template<uint Size> auto M68K::ROR(uint32 result, uint shift) -> uint32 {
bool carry = false;
for(auto _ : range(shift)) {
carry = result & lsb<Size>();
result >>= 1;
if(carry) result |= msb<Size>();
}
r.c = carry;
r.v = 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionROR(uint4 shift, DataRegister modify) -> void {
auto result = ROR<Size>(read<Size>(modify), shift);
write<Size>(modify, result);
}
template<uint Size> auto M68K::instructionROR(DataRegister shift, DataRegister modify) -> void {
auto count = read<Long>(shift) & 63;
auto result = ROR<Size>(read<Size>(modify), count);
write<Size>(modify, result);
}
auto M68K::instructionROR(EffectiveAddress modify) -> void {
auto result = ROR<Word>(read<Word, NoUpdate>(modify), 1);
write<Word>(modify, result);
}
template<uint Size> auto M68K::ROXL(uint32 result, uint shift) -> uint32 {
bool carry = r.x;
for(auto _ : range(shift)) {
bool extend = carry;
carry = result & msb<Size>();
result = result << 1 | extend;
}
r.c = carry;
r.v = 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
r.x = r.c;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionROXL(uint4 shift, DataRegister modify) -> void {
auto result = ROXL<Size>(read<Size>(modify), shift);
write<Size>(modify, result);
}
template<uint Size> auto M68K::instructionROXL(DataRegister shift, DataRegister modify) -> void {
auto count = read<Long>(shift) & 63;
auto result = ROXL<Size>(read<Size>(modify), count);
write<Size>(modify, result);
}
auto M68K::instructionROXL(EffectiveAddress modify) -> void {
auto result = ROXL<Word>(read<Word, NoUpdate>(modify), 1);
write<Word>(modify, result);
}
template<uint Size> auto M68K::ROXR(uint32 result, uint shift) -> uint32 {
bool carry = r.x;
for(auto _ : range(shift)) {
bool extend = carry;
carry = result & lsb<Size>();
result >>= 1;
if(extend) result |= msb<Size>();
}
r.c = carry;
r.v = 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
r.x = r.c;
return clip<Size>(result);
}
template<uint Size> auto M68K::instructionROXR(uint4 shift, DataRegister modify) -> void {
auto result = ROXR<Size>(read<Size>(modify), shift);
write<Size>(modify, result);
}
template<uint Size> auto M68K::instructionROXR(DataRegister shift, DataRegister modify) -> void {
auto count = read<Long>(shift) & 63;
auto result = ROXR<Size>(read<Size>(modify), count);
write<Size>(modify, result);
}
auto M68K::instructionROXR(EffectiveAddress modify) -> void {
auto result = ROXR<Word>(read<Word, NoUpdate>(modify), 1);
write<Word>(modify, result);
}
auto M68K::instructionRTS() -> void {
r.pc = pop<Long>();
}
template<uint Size> auto M68K::instructionSUBQ(uint4 immediate, EffectiveAddress ea) -> void {
uint64 target = read<Size, NoUpdate>(ea);
uint64 source = immediate;
uint64 result = target - source;
write<Size>(ea, result);
r.c = sign<Size>(result >> 1) < 0;
r.v = sign<Size>((target ^ source) & (target ^ result)) < 0;
r.z = clip<Size>(result) == 0;
r.n = sign<Size>(result) < 0;
r.x = r.c;
}
template<uint Size> auto M68K::instructionTST(EffectiveAddress ea) -> void {
auto data = read<Size>(ea);
r.c = 0;
r.v = 0;
r.z = zero<Size>(data);
r.n = negative<Size>(data);
}