2017-08-06 13:36:26 +00:00
|
|
|
//ARMv3 (ARM60)
|
|
|
|
//ARMv4 (ARM7TDMI)
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
namespace Processor {
|
|
|
|
|
|
|
|
struct ARM7TDMI {
|
|
|
|
enum : uint {
|
|
|
|
Nonsequential = 1 << 0, //N cycle
|
|
|
|
Sequential = 1 << 1, //S cycle
|
|
|
|
Prefetch = 1 << 2, //instruction fetch
|
|
|
|
Byte = 1 << 3, // 8-bit access
|
|
|
|
Half = 1 << 4, //16-bit access
|
|
|
|
Word = 1 << 5, //32-bit access
|
|
|
|
Load = 1 << 6, //load operation
|
|
|
|
Store = 1 << 7, //store operation
|
|
|
|
Signed = 1 << 8, //sign-extend
|
|
|
|
};
|
|
|
|
|
|
|
|
virtual auto step(uint clocks) -> void = 0;
|
|
|
|
virtual auto sleep() -> void = 0;
|
|
|
|
virtual auto get(uint mode, uint32 address) -> uint32 = 0;
|
|
|
|
virtual auto set(uint mode, uint32 address, uint32 word) -> void = 0;
|
|
|
|
|
|
|
|
//arm7tdmi.cpp
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
ARM7TDMI();
|
2017-08-06 13:36:26 +00:00
|
|
|
auto power() -> void;
|
|
|
|
|
|
|
|
//registers.cpp
|
|
|
|
struct GPR;
|
|
|
|
struct PSR;
|
|
|
|
inline auto r(uint4) -> GPR&;
|
|
|
|
inline auto u(uint4) -> GPR&;
|
|
|
|
inline auto cpsr() -> PSR&;
|
|
|
|
inline auto spsr() -> PSR&;
|
|
|
|
inline auto privileged() const -> bool;
|
|
|
|
inline auto exception() const -> bool;
|
|
|
|
|
|
|
|
//memory.cpp
|
|
|
|
auto idle() -> void;
|
|
|
|
auto read(uint mode, uint32 address) -> uint32;
|
|
|
|
auto load(uint mode, uint32 address) -> uint32;
|
|
|
|
auto write(uint mode, uint32 address, uint32 word) -> void;
|
|
|
|
auto store(uint mode, uint32 address, uint32 word) -> void;
|
|
|
|
|
|
|
|
//algorithms.cpp
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto ADD(uint32, uint32, bool) -> uint32;
|
2017-08-06 13:36:26 +00:00
|
|
|
auto ASR(uint32, uint8) -> uint32;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto BIT(uint32) -> uint32;
|
2017-08-06 13:36:26 +00:00
|
|
|
auto LSL(uint32, uint8) -> uint32;
|
|
|
|
auto LSR(uint32, uint8) -> uint32;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto MUL(uint32, uint32, uint32) -> uint32;
|
2017-08-06 13:36:26 +00:00
|
|
|
auto ROR(uint32, uint8) -> uint32;
|
|
|
|
auto RRX(uint32) -> uint32;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto SUB(uint32, uint32, bool) -> uint32;
|
2017-08-06 13:36:26 +00:00
|
|
|
auto TST(uint4) -> bool;
|
|
|
|
|
|
|
|
//instruction.cpp
|
|
|
|
auto fetch() -> void;
|
|
|
|
auto instruction() -> void;
|
|
|
|
auto interrupt(uint mode, uint32 address) -> void;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armInitialize() -> void;
|
|
|
|
auto thumbInitialize() -> void;
|
2017-08-06 13:36:26 +00:00
|
|
|
|
|
|
|
//instructions-arm.cpp
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armALU(uint4 mode, uint4 target, uint4 source, uint32 data) -> void;
|
|
|
|
auto armMoveToStatus(uint4 field, uint1 source, uint32 data) -> void;
|
|
|
|
|
|
|
|
auto armInstructionBranchExchangeRegister(uint4) -> void;
|
|
|
|
auto armInstructionLoadImmediate(uint8, uint1, uint4, uint4, uint1, uint1, uint1) -> void;
|
|
|
|
auto armInstructionLoadRegister(uint4, uint1, uint4, uint4, uint1, uint1, uint1) -> void;
|
|
|
|
auto armInstructionMemorySwap(uint4, uint4, uint4, uint1) -> void;
|
|
|
|
auto armInstructionMoveHalfImmediate(uint8, uint4, uint4, uint1, uint1, uint1, uint1) -> void;
|
|
|
|
auto armInstructionMoveHalfRegister(uint4, uint4, uint4, uint1, uint1, uint1, uint1) -> void;
|
|
|
|
auto armInstructionMoveToRegisterFromStatus(uint4, uint1) -> void;
|
|
|
|
auto armInstructionMoveToStatusFromImmediate(uint8, uint4, uint4, uint1) -> void;
|
|
|
|
auto armInstructionMoveToStatusFromRegister(uint4, uint4, uint1) -> void;
|
|
|
|
auto armInstructionMultiply(uint4, uint4, uint4, uint4, uint1, uint1) -> void;
|
|
|
|
auto armInstructionMultiplyLong(uint4, uint4, uint4, uint4, uint1, uint1, uint1) -> void;
|
2017-08-06 13:36:26 +00:00
|
|
|
|
|
|
|
//instructions-thumb.cpp
|
|
|
|
auto thumbALU(uint4 mode, uint4 target, uint4 source) -> void;
|
|
|
|
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto thumbInstructionAdjustRegister(uint3, uint3, uint3, uint1) -> void;
|
|
|
|
|
2017-08-06 13:36:26 +00:00
|
|
|
//serialization.cpp
|
|
|
|
auto serialize(serializer&) -> void;
|
|
|
|
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
//disassembler.cpp
|
|
|
|
auto disassemble(uint32 pc) -> string;
|
|
|
|
|
2017-08-06 13:36:26 +00:00
|
|
|
struct GPR {
|
|
|
|
inline operator uint32_t() const {
|
|
|
|
return data;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto operator=(uint32 value) -> GPR& {
|
|
|
|
data = value;
|
|
|
|
if(modify) modify();
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32 data;
|
|
|
|
function<auto () -> void> modify;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct PSR {
|
|
|
|
enum : uint {
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
USR26 = 0x00, //26-bit user
|
|
|
|
FIQ26 = 0x01, //26-bit fast interrupt
|
|
|
|
IRQ26 = 0x02, //26-bit interrupt
|
|
|
|
SVC26 = 0x03, //26-bit service
|
|
|
|
|
2017-08-06 13:36:26 +00:00
|
|
|
USR = 0x10, //user
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
FIQ = 0x11, //fast interrupt
|
|
|
|
IRQ = 0x12, //interrupt
|
|
|
|
SVC = 0x13, //service
|
2017-08-06 13:36:26 +00:00
|
|
|
ABT = 0x17, //abort
|
|
|
|
UND = 0x1b, //undefined
|
|
|
|
SYS = 0x1f, //system
|
|
|
|
};
|
|
|
|
|
|
|
|
inline operator uint32_t() const {
|
|
|
|
return m << 0 | t << 5 | f << 6 | i << 7 | v << 28 | c << 29 | z << 30 | n << 31;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto operator=(uint32 data) -> PSR& {
|
|
|
|
m = data.bits(0,4);
|
|
|
|
t = data.bit(5);
|
|
|
|
f = data.bit(6);
|
|
|
|
i = data.bit(7);
|
|
|
|
v = data.bit(28);
|
|
|
|
c = data.bit(29);
|
|
|
|
z = data.bit(30);
|
|
|
|
n = data.bit(31);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
//serialization.cpp
|
|
|
|
auto serialize(serializer&) -> void;
|
|
|
|
|
|
|
|
uint5 m; //mode
|
|
|
|
uint1 t; //thumb
|
|
|
|
uint1 f; //fiq
|
|
|
|
uint1 i; //irq
|
|
|
|
uint1 v; //overflow
|
|
|
|
uint1 c; //carry
|
|
|
|
uint1 z; //zero
|
|
|
|
uint1 n; //negative
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Processor {
|
|
|
|
//serialization.cpp
|
|
|
|
auto serialize(serializer&) -> void;
|
|
|
|
|
|
|
|
GPR r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
|
|
|
|
PSR cpsr;
|
|
|
|
|
|
|
|
struct FIQ {
|
|
|
|
GPR r8, r9, r10, r11, r12, r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} fiq;
|
|
|
|
|
|
|
|
struct IRQ {
|
|
|
|
GPR r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} irq;
|
|
|
|
|
|
|
|
struct SVC {
|
|
|
|
GPR r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} svc;
|
|
|
|
|
|
|
|
struct ABT {
|
|
|
|
GPR r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} abt;
|
|
|
|
|
|
|
|
struct UND {
|
|
|
|
GPR r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} und;
|
|
|
|
} processor;
|
|
|
|
|
|
|
|
struct Pipeline {
|
|
|
|
//serialization.cpp
|
|
|
|
auto serialize(serializer&) -> void;
|
|
|
|
|
|
|
|
struct Instruction {
|
|
|
|
uint32 address;
|
|
|
|
uint32 instruction;
|
|
|
|
};
|
|
|
|
|
|
|
|
uint1 reload = 1;
|
|
|
|
uint1 nonsequential = 1;
|
|
|
|
Instruction fetch;
|
|
|
|
Instruction decode;
|
|
|
|
Instruction execute;
|
|
|
|
} pipeline;
|
|
|
|
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
uint32 opcode;
|
2017-08-06 13:36:26 +00:00
|
|
|
boolean carry;
|
|
|
|
boolean irq;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
|
|
|
|
function<void (uint32 opcode)> armInstruction[4096];
|
|
|
|
function<void ()> thumbInstruction[65536];
|
|
|
|
|
|
|
|
function<string (uint32 opcode)> armDisassemble[4096];
|
|
|
|
function<string ()> thumbDisassemble[65536];
|
|
|
|
|
|
|
|
//disassembler.cpp
|
|
|
|
auto armDisassembleBranchExchangeRegister(uint4) -> string;
|
|
|
|
auto armDisassembleLoadImmediate(uint8, uint1, uint4, uint4, uint1, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleLoadRegister(uint4, uint1, uint4, uint4, uint1, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleMemorySwap(uint4, uint4, uint4, uint1) -> string;
|
|
|
|
auto armDisassembleMoveHalfImmediate(uint8, uint4, uint4, uint1, uint1, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleMoveHalfRegister(uint4, uint4, uint4, uint1, uint1, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleMoveToRegisterFromStatus(uint4, uint1) -> string;
|
|
|
|
auto armDisassembleMoveToStatusFromImmediate(uint8, uint4, uint4, uint1) -> string;
|
|
|
|
auto armDisassembleMoveToStatusFromRegister(uint4, uint4, uint1) -> string;
|
|
|
|
auto armDisassembleMultiply(uint4, uint4, uint4, uint4, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleMultiplyLong(uint4, uint4, uint4, uint4, uint1, uint1, uint1) -> string;
|
|
|
|
|
|
|
|
auto thumbDisassembleAdjustRegister(uint3, uint3, uint3, uint1) -> string;
|
2017-08-06 13:36:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|