2017-08-06 13:36:26 +00:00
|
|
|
//ARMv4 (ARM7TDMI)
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
namespace Processor {
|
|
|
|
|
|
|
|
struct ARM7TDMI {
|
|
|
|
enum : uint {
|
|
|
|
Nonsequential = 1 << 0, //N cycle
|
|
|
|
Sequential = 1 << 1, //S cycle
|
|
|
|
Prefetch = 1 << 2, //instruction fetch
|
|
|
|
Byte = 1 << 3, // 8-bit access
|
|
|
|
Half = 1 << 4, //16-bit access
|
|
|
|
Word = 1 << 5, //32-bit access
|
|
|
|
Load = 1 << 6, //load operation
|
|
|
|
Store = 1 << 7, //store operation
|
|
|
|
Signed = 1 << 8, //sign-extend
|
|
|
|
};
|
|
|
|
|
|
|
|
virtual auto step(uint clocks) -> void = 0;
|
|
|
|
virtual auto sleep() -> void = 0;
|
|
|
|
virtual auto get(uint mode, uint32 address) -> uint32 = 0;
|
|
|
|
virtual auto set(uint mode, uint32 address, uint32 word) -> void = 0;
|
|
|
|
|
|
|
|
//arm7tdmi.cpp
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
ARM7TDMI();
|
2017-08-06 13:36:26 +00:00
|
|
|
auto power() -> void;
|
|
|
|
|
|
|
|
//registers.cpp
|
|
|
|
struct GPR;
|
|
|
|
struct PSR;
|
|
|
|
inline auto r(uint4) -> GPR&;
|
|
|
|
inline auto cpsr() -> PSR&;
|
|
|
|
inline auto spsr() -> PSR&;
|
|
|
|
inline auto privileged() const -> bool;
|
|
|
|
inline auto exception() const -> bool;
|
|
|
|
|
|
|
|
//memory.cpp
|
|
|
|
auto idle() -> void;
|
|
|
|
auto read(uint mode, uint32 address) -> uint32;
|
|
|
|
auto load(uint mode, uint32 address) -> uint32;
|
|
|
|
auto write(uint mode, uint32 address, uint32 word) -> void;
|
|
|
|
auto store(uint mode, uint32 address, uint32 word) -> void;
|
|
|
|
|
|
|
|
//algorithms.cpp
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto ADD(uint32, uint32, bool) -> uint32;
|
2017-08-06 13:36:26 +00:00
|
|
|
auto ASR(uint32, uint8) -> uint32;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto BIT(uint32) -> uint32;
|
2017-08-06 13:36:26 +00:00
|
|
|
auto LSL(uint32, uint8) -> uint32;
|
|
|
|
auto LSR(uint32, uint8) -> uint32;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto MUL(uint32, uint32, uint32) -> uint32;
|
2017-08-06 13:36:26 +00:00
|
|
|
auto ROR(uint32, uint8) -> uint32;
|
|
|
|
auto RRX(uint32) -> uint32;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto SUB(uint32, uint32, bool) -> uint32;
|
2017-08-06 13:36:26 +00:00
|
|
|
auto TST(uint4) -> bool;
|
|
|
|
|
|
|
|
//instruction.cpp
|
|
|
|
auto fetch() -> void;
|
|
|
|
auto instruction() -> void;
|
2017-08-11 16:02:09 +00:00
|
|
|
auto exception(uint mode, uint32 address) -> void;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armInitialize() -> void;
|
|
|
|
auto thumbInitialize() -> void;
|
2017-08-06 13:36:26 +00:00
|
|
|
|
|
|
|
//instructions-arm.cpp
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armALU(uint4 mode, uint4 target, uint4 source, uint32 data) -> void;
|
|
|
|
auto armMoveToStatus(uint4 field, uint1 source, uint32 data) -> void;
|
|
|
|
|
2017-08-08 11:51:41 +00:00
|
|
|
auto armInstructionBranch(int24, uint1) -> void;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armInstructionBranchExchangeRegister(uint4) -> void;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto armInstructionDataImmediate(uint8, uint4, uint4, uint4, uint1, uint4) -> void;
|
|
|
|
auto armInstructionDataImmediateShift(uint4, uint2, uint5, uint4, uint4, uint1, uint4) -> void;
|
|
|
|
auto armInstructionDataRegisterShift(uint4, uint2, uint4, uint4, uint4, uint1, uint4) -> void;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armInstructionLoadImmediate(uint8, uint1, uint4, uint4, uint1, uint1, uint1) -> void;
|
|
|
|
auto armInstructionLoadRegister(uint4, uint1, uint4, uint4, uint1, uint1, uint1) -> void;
|
|
|
|
auto armInstructionMemorySwap(uint4, uint4, uint4, uint1) -> void;
|
|
|
|
auto armInstructionMoveHalfImmediate(uint8, uint4, uint4, uint1, uint1, uint1, uint1) -> void;
|
|
|
|
auto armInstructionMoveHalfRegister(uint4, uint4, uint4, uint1, uint1, uint1, uint1) -> void;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto armInstructionMoveImmediateOffset(uint12, uint4, uint4, uint1, uint1, uint1, uint1, uint1) -> void;
|
|
|
|
auto armInstructionMoveMultiple(uint16, uint4, uint1, uint1, uint1, uint1, uint1) -> void;
|
|
|
|
auto armInstructionMoveRegisterOffset(uint4, uint2, uint5, uint4, uint4, uint1, uint1, uint1, uint1, uint1) -> void;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armInstructionMoveToRegisterFromStatus(uint4, uint1) -> void;
|
|
|
|
auto armInstructionMoveToStatusFromImmediate(uint8, uint4, uint4, uint1) -> void;
|
|
|
|
auto armInstructionMoveToStatusFromRegister(uint4, uint4, uint1) -> void;
|
|
|
|
auto armInstructionMultiply(uint4, uint4, uint4, uint4, uint1, uint1) -> void;
|
|
|
|
auto armInstructionMultiplyLong(uint4, uint4, uint4, uint4, uint1, uint1, uint1) -> void;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto armInstructionSoftwareInterrupt(uint24 immediate) -> void;
|
2017-08-11 16:02:09 +00:00
|
|
|
auto armInstructionUndefined() -> void;
|
2017-08-06 13:36:26 +00:00
|
|
|
|
|
|
|
//instructions-thumb.cpp
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbInstructionALU(uint3, uint3, uint4) -> void;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbInstructionALUExtended(uint4, uint4, uint2) -> void;
|
|
|
|
auto thumbInstructionAddRegister(uint8, uint3, uint1) -> void;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbInstructionAdjustImmediate(uint3, uint3, uint3, uint1) -> void;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto thumbInstructionAdjustRegister(uint3, uint3, uint3, uint1) -> void;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbInstructionAdjustStack(uint7, uint1) -> void;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbInstructionBranchExchange(uint4) -> void;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbInstructionBranchFarPrefix(int11) -> void;
|
|
|
|
auto thumbInstructionBranchFarSuffix(uint11) -> void;
|
|
|
|
auto thumbInstructionBranchNear(int11) -> void;
|
|
|
|
auto thumbInstructionBranchTest(int8, uint4) -> void;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbInstructionImmediate(uint8, uint3, uint2) -> void;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbInstructionLoadLiteral(uint8, uint3) -> void;
|
|
|
|
auto thumbInstructionMoveByteImmediate(uint3, uint3, uint5, uint1) -> void;
|
|
|
|
auto thumbInstructionMoveHalfImmediate(uint3, uint3, uint5, uint1) -> void;
|
|
|
|
auto thumbInstructionMoveMultiple(uint8, uint3, uint1) -> void;
|
|
|
|
auto thumbInstructionMoveRegisterOffset(uint3, uint3, uint3, uint3) -> void;
|
|
|
|
auto thumbInstructionMoveStack(uint8, uint3, uint1) -> void;
|
|
|
|
auto thumbInstructionMoveWordImmediate(uint3, uint3, uint5, uint1) -> void;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbInstructionShiftImmediate(uint3, uint3, uint5, uint2) -> void;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbInstructionSoftwareInterrupt(uint8) -> void;
|
|
|
|
auto thumbInstructionStackMultiple(uint8, uint1, uint1) -> void;
|
2017-08-11 16:02:09 +00:00
|
|
|
auto thumbInstructionUndefined() -> void;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
|
2017-08-06 13:36:26 +00:00
|
|
|
//serialization.cpp
|
|
|
|
auto serialize(serializer&) -> void;
|
|
|
|
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
//disassembler.cpp
|
2017-08-10 11:26:02 +00:00
|
|
|
auto disassemble(maybe<uint32> pc = nothing, maybe<boolean> thumb = nothing) -> string;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto disassembleRegisters() -> string;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
|
2017-08-06 13:36:26 +00:00
|
|
|
struct GPR {
|
2017-08-11 16:02:09 +00:00
|
|
|
inline operator uint32_t() const { return data; }
|
|
|
|
inline auto operator=(const GPR& value) -> GPR& { return operator=(value.data); }
|
2017-08-06 13:36:26 +00:00
|
|
|
|
|
|
|
inline auto operator=(uint32 value) -> GPR& {
|
|
|
|
data = value;
|
|
|
|
if(modify) modify();
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32 data;
|
|
|
|
function<auto () -> void> modify;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct PSR {
|
|
|
|
enum : uint {
|
|
|
|
USR = 0x10, //user
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
FIQ = 0x11, //fast interrupt
|
|
|
|
IRQ = 0x12, //interrupt
|
|
|
|
SVC = 0x13, //service
|
2017-08-06 13:36:26 +00:00
|
|
|
ABT = 0x17, //abort
|
|
|
|
UND = 0x1b, //undefined
|
|
|
|
SYS = 0x1f, //system
|
|
|
|
};
|
|
|
|
|
|
|
|
inline operator uint32_t() const {
|
|
|
|
return m << 0 | t << 5 | f << 6 | i << 7 | v << 28 | c << 29 | z << 30 | n << 31;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto operator=(uint32 data) -> PSR& {
|
|
|
|
m = data.bits(0,4);
|
|
|
|
t = data.bit(5);
|
|
|
|
f = data.bit(6);
|
|
|
|
i = data.bit(7);
|
|
|
|
v = data.bit(28);
|
|
|
|
c = data.bit(29);
|
|
|
|
z = data.bit(30);
|
|
|
|
n = data.bit(31);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
//serialization.cpp
|
|
|
|
auto serialize(serializer&) -> void;
|
|
|
|
|
2017-08-10 11:26:02 +00:00
|
|
|
uint5 m; //mode
|
|
|
|
boolean t; //thumb
|
|
|
|
boolean f; //fiq
|
|
|
|
boolean i; //irq
|
|
|
|
boolean v; //overflow
|
|
|
|
boolean c; //carry
|
|
|
|
boolean z; //zero
|
|
|
|
boolean n; //negative
|
2017-08-06 13:36:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Processor {
|
|
|
|
//serialization.cpp
|
|
|
|
auto serialize(serializer&) -> void;
|
|
|
|
|
|
|
|
GPR r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
|
|
|
|
PSR cpsr;
|
|
|
|
|
|
|
|
struct FIQ {
|
|
|
|
GPR r8, r9, r10, r11, r12, r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} fiq;
|
|
|
|
|
|
|
|
struct IRQ {
|
|
|
|
GPR r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} irq;
|
|
|
|
|
|
|
|
struct SVC {
|
|
|
|
GPR r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} svc;
|
|
|
|
|
|
|
|
struct ABT {
|
|
|
|
GPR r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} abt;
|
|
|
|
|
|
|
|
struct UND {
|
|
|
|
GPR r13, r14;
|
|
|
|
PSR spsr;
|
|
|
|
} und;
|
|
|
|
} processor;
|
|
|
|
|
|
|
|
struct Pipeline {
|
|
|
|
//serialization.cpp
|
|
|
|
auto serialize(serializer&) -> void;
|
|
|
|
|
|
|
|
struct Instruction {
|
|
|
|
uint32 address;
|
|
|
|
uint32 instruction;
|
2017-08-11 16:02:09 +00:00
|
|
|
boolean thumb; //not used by fetch stage
|
2017-08-06 13:36:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
uint1 reload = 1;
|
|
|
|
uint1 nonsequential = 1;
|
|
|
|
Instruction fetch;
|
|
|
|
Instruction decode;
|
|
|
|
Instruction execute;
|
|
|
|
} pipeline;
|
|
|
|
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
uint32 opcode;
|
2017-08-06 13:36:26 +00:00
|
|
|
boolean carry;
|
|
|
|
boolean irq;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
|
2017-08-11 16:02:09 +00:00
|
|
|
function<auto (uint32 opcode) -> void> armInstruction[4096];
|
|
|
|
function<auto () -> void> thumbInstruction[65536];
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
|
|
|
|
//disassembler.cpp
|
2017-08-08 11:51:41 +00:00
|
|
|
auto armDisassembleBranch(int24, uint1) -> string;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armDisassembleBranchExchangeRegister(uint4) -> string;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto armDisassembleDataImmediate(uint8, uint4, uint4, uint4, uint1, uint4) -> string;
|
|
|
|
auto armDisassembleDataImmediateShift(uint4, uint2, uint5, uint4, uint4, uint1, uint4) -> string;
|
|
|
|
auto armDisassembleDataRegisterShift(uint4, uint2, uint4, uint4, uint4, uint1, uint4) -> string;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armDisassembleLoadImmediate(uint8, uint1, uint4, uint4, uint1, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleLoadRegister(uint4, uint1, uint4, uint4, uint1, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleMemorySwap(uint4, uint4, uint4, uint1) -> string;
|
|
|
|
auto armDisassembleMoveHalfImmediate(uint8, uint4, uint4, uint1, uint1, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleMoveHalfRegister(uint4, uint4, uint4, uint1, uint1, uint1, uint1) -> string;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto armDisassembleMoveImmediateOffset(uint12, uint4, uint4, uint1, uint1, uint1, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleMoveMultiple(uint16, uint4, uint1, uint1, uint1, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleMoveRegisterOffset(uint4, uint2, uint5, uint4, uint4, uint1, uint1, uint1, uint1, uint1) -> string;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto armDisassembleMoveToRegisterFromStatus(uint4, uint1) -> string;
|
|
|
|
auto armDisassembleMoveToStatusFromImmediate(uint8, uint4, uint4, uint1) -> string;
|
|
|
|
auto armDisassembleMoveToStatusFromRegister(uint4, uint4, uint1) -> string;
|
|
|
|
auto armDisassembleMultiply(uint4, uint4, uint4, uint4, uint1, uint1) -> string;
|
|
|
|
auto armDisassembleMultiplyLong(uint4, uint4, uint4, uint4, uint1, uint1, uint1) -> string;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto armDisassembleSoftwareInterrupt(uint24) -> string;
|
2017-08-11 16:02:09 +00:00
|
|
|
auto armDisassembleUndefined() -> string;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbDisassembleALU(uint3, uint3, uint4) -> string;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbDisassembleALUExtended(uint4, uint4, uint2) -> string;
|
|
|
|
auto thumbDisassembleAddRegister(uint8, uint3, uint1) -> string;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbDisassembleAdjustImmediate(uint3, uint3, uint3, uint1) -> string;
|
Update to v103r28 release.
byuu says:
Changelog:
- processor/arm7tdmi: implemented 10 of 19 ARM instructions
- processor/arm7tdmi: implemented 1 of 22 THUMB instructions
Today's WIP was 6 hours of work, and yesterday's was 5 hours.
Half of today was just trying to come up with the design to use a
lambda-based dispatcher to map both instructions and disassembly,
similar to the 68K core. The problem is that the ARM core has 28 unique
bits, which is just far too many bits to have a full lookup table like
the 16-bit 68K core.
The thing I wanted more than anything else was to perform the opcode
bitfield decoding once, and have it decoded for both instructions and
the disassembler. It took three hours to come up with a design that
worked for the ARM half ... relying on #defines being able to pull in
other #defines that were declared and changed later after the first
one. But, I'm happy with it. The decoding is in the table building, as
it is with the 68K core. The decoding does happen at run-time on each
instruction invocation, but it has to be done.
As to the THUMB core, I can create a 64K-entry lambda table to cover all
possible encodings, and ... even though it's a cache killer, I've
decided to go for it, given the outstanding performance it obtained in
the M68K core, as well as considering that THUMB mode is far more common
in GBA games.
As to both cores ... I'm a little torn between two extremes:
On the one hand, I can condense the number of ARM/THUMB instructions
further to eliminate more redundant code. On the other, I can split them
apart to reduce the number of conditional tests needed to execute each
instruction. It's really the disassembler that makes me not want to
split them up further ... as I have to split the disassembler functions
up equally to the instruction functions. But it may be worth it if it's
a speed improvement.
2017-08-07 12:20:35 +00:00
|
|
|
auto thumbDisassembleAdjustRegister(uint3, uint3, uint3, uint1) -> string;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbDisassembleAdjustStack(uint7, uint1) -> string;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbDisassembleBranchExchange(uint4) -> string;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbDisassembleBranchFarPrefix(int11) -> string;
|
|
|
|
auto thumbDisassembleBranchFarSuffix(uint11) -> string;
|
|
|
|
auto thumbDisassembleBranchNear(int11) -> string;
|
|
|
|
auto thumbDisassembleBranchTest(int8, uint4) -> string;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbDisassembleImmediate(uint8, uint3, uint2) -> string;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbDisassembleLoadLiteral(uint8, uint3) -> string;
|
|
|
|
auto thumbDisassembleMoveByteImmediate(uint3, uint3, uint5, uint1) -> string;
|
|
|
|
auto thumbDisassembleMoveHalfImmediate(uint3, uint3, uint5, uint1) -> string;
|
|
|
|
auto thumbDisassembleMoveMultiple(uint8, uint3, uint1) -> string;
|
|
|
|
auto thumbDisassembleMoveRegisterOffset(uint3, uint3, uint3, uint3) -> string;
|
|
|
|
auto thumbDisassembleMoveStack(uint8, uint3, uint1) -> string;
|
|
|
|
auto thumbDisassembleMoveWordImmediate(uint3, uint3, uint5, uint1) -> string;
|
2017-08-08 11:51:41 +00:00
|
|
|
auto thumbDisassembleShiftImmediate(uint3, uint3, uint5, uint2) -> string;
|
2017-08-09 11:11:59 +00:00
|
|
|
auto thumbDisassembleSoftwareInterrupt(uint8) -> string;
|
|
|
|
auto thumbDisassembleStackMultiple(uint8, uint1, uint1) -> string;
|
2017-08-11 16:02:09 +00:00
|
|
|
auto thumbDisassembleUndefined() -> string;
|
|
|
|
|
|
|
|
function<auto (uint32 opcode) -> string> armDisassemble[4096];
|
|
|
|
function<auto () -> string> thumbDisassemble[65536];
|
2017-08-09 11:11:59 +00:00
|
|
|
|
|
|
|
uint32 _pc;
|
|
|
|
string _c;
|
2017-08-06 13:36:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|