Update to higan v091 release.

byuu says:

Basically just a project rename, with s/bsnes/higan and the new icon
from lowkee added in.

It won't compile on Windows because I forgot to update the resource.rc
file, and a path transform command isn't working on Windows.
It was really just meant as a starting point, so that v091 WIPs can flow
starting from .00 with the new name (it overshadows bsnes v091, so
publicly speaking this "shouldn't exist" and will probably be deleted
from Google Code when v092 is ready.)
This commit is contained in:
Tim Allen 2012-08-16 20:30:47 +10:00
parent 7f404e6edb
commit 94b2538af5
1002 changed files with 1100 additions and 11590 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

View File

@ -1,16 +0,0 @@
nds_objects := nds-interface
nds_objects += nds-cpu nds-apu nds-ppu nds-gpu nds-video
nds_objects += nds-memory nds-system nds-slot1 nds-slot2 nds-wifi
objects += $(nds_objects)
obj/nds-interface.o: $(nds)/interface/interface.cpp $(call rwildcard,$(nds)/interface)
obj/nds-cpu.o: $(nds)/cpu/cpu.cpp $(call rwildcard,$(nds)/cpu)
obj/nds-apu.o: $(nds)/apu/apu.cpp $(call rwildcard,$(nds)/apu)
obj/nds-ppu.o: $(nds)/ppu/ppu.cpp $(call rwildcard,$(nds)/ppu)
obj/nds-gpu.o: $(nds)/gpu/gpu.cpp $(call rwildcard,$(nds)/gpu)
obj/nds-video.o: $(nds)/video/video.cpp $(call rwildcard,$(nds)/video)
obj/nds-memory.o: $(nds)/memory/memory.cpp $(call rwildcard,$(nds)/memory)
obj/nds-system.o: $(nds)/system/system.cpp $(call rwildcard,$(nds)/system)
obj/nds-slot1.o: $(nds)/slot1/slot1.cpp $(call rwildcard,$(nds)/slot1)
obj/nds-slot2.o: $(nds)/slot2/slot2.cpp $(call rwildcard,$(nds)/slot2)
obj/nds-wifi.o: $(nds)/wifi/wifi.cpp $(call rwildcard,$(nds)/wifi)

View File

@ -1,403 +0,0 @@
#include <nds/nds.hpp>
namespace NintendoDS {
APU apu;
void APU::power() {
for(unsigned n = 0; n < 16; n++) {
auto &v = voices[n];
v.enable = false;
v.hold = false;
v.running = false;
v.format = Voice::PCM8;
v.duty = 0;
v.limit = 0;
v.panning = 0x40;
v.volumeBase = 0x7f;
v.volumeExp = 0;
v.amplitude = 0x7f0;
v.source = 0;
v.length = 0;
v.counter = 0;
v.sample = 0;
v.init.source = 0;
v.init.counter = 0;
v.init.length1 = 0;
v.init.length2 = 0;
v.event.action = [&, n]() { stepVoice(n); };
}
// Audio runs at 16.8MHz. I've somewhat arbitrarily set
// it up so that things run in the following order:
// - channel read + update (+0)
// - mixer (+1)
// - eventually, capture (+2)
//
// Mixer runs at 33Khz = 66 MHz / 2048
mixEvent.action = [&]() { stepMixer(); };
arm7.event.queue.add(2048+1, mixEvent);
powered = true;
}
void APU::stepMixer() {
arm7.event.queue.add(2048, mixEvent);
int64 l = 0, r = 0;
for(unsigned n = 0; n < 16; n++) {
auto &v = voices[n];
int64 s = v.sample * v.amplitude;
l += s * (128 - v.panning);
r += s * (0 + v.panning);
}
l = sclamp<16>(l / 0x80000);
r = sclamp<16>(r / 0x80000);
interface->audioSample(l, r);
}
void APU::stepVoice(unsigned no) {
auto &v = voices[no];
uint32 t = arm7.event.queue.time;
if(v.format == Voice::PCM8) stepPCM8(no);
else if(v.format == Voice::PCM16) stepPCM16(no);
else if(v.format == Voice::ADPCM4) stepADPCM4(no);
else if(v.format == Voice::PSG && no >= 14) stepNoise(no);
else if(v.format == Voice::PSG && no >= 8) stepPulse(no);
v.counter = v.init.counter;
uint32 fetchTime = arm7.event.queue.time - t;
uint32 nextSample = 4*(0x10000 - v.counter);
nextSample -= fetchTime;
if(unsigned k = t & 3)
nextSample += 4 - k;
// When this happens, the buffering can't keep up. It isn't clear
// what the real system will do, but it can't be anything good...
if(nextSample >= 0x80000000)
nextSample = 0;
// This requires further thought and investigation.
// Do buffering issues delay the playback timer? (hopefully not)
// Do audio DMAs interleave in any way? (doubtful; it'd be 10x slower)
if(v.running)
arm7.event.queue.add(nextSample, v.event);
}
void APU::stepPulse(unsigned no) {
auto &v = voices[no];
uint3 step = v.state--;
v.sample = step > v.duty? -0x7fff : +0x7fff;
}
void APU::stepNoise(unsigned no) {
auto &v = voices[no];
bool out = v.state & 1;
v.state >>= 1;
v.state ^= 0x6000*out;
v.sample = out? -0x7fff : +0x7fff;
}
void APU::stepPCM8(unsigned no) {
auto &v = voices[no];
checkLength(no);
fillBuffer(no);
v.sample = 0x100*int8(v.buffer[v.index/8] >> 4*(v.index & 6));
v.index += 2;
v.length -= 1;
}
void APU::stepPCM16(unsigned no) {
auto &v = voices[no];
checkLength(no);
fillBuffer(no);
v.sample = int16(v.buffer[v.index/8] >> 4*(v.index & 4));
v.index += 4;
v.length -= 2;
}
void APU::stepADPCM4(unsigned no) {
auto &v = voices[no];
static const int16 table[] = {
0x0007,0x0008,0x0009,0x000a,0x000b,0x000c,0x000d,0x000e,
0x0010,0x0011,0x0013,0x0015,0x0017,0x0019,0x001c,0x001f,
0x0022,0x0025,0x0029,0x002d,0x0032,0x0037,0x003c,0x0042,
0x0049,0x0050,0x0058,0x0061,0x006b,0x0076,0x0082,0x008f,
0x009d,0x00ad,0x00be,0x00d1,0x00e6,0x00fd,0x0117,0x0133,
0x0151,0x0173,0x0198,0x01c1,0x01ee,0x0220,0x0256,0x0292,
0x02d4,0x031c,0x036c,0x03c3,0x0424,0x048e,0x0502,0x0583,
0x0610,0x06ab,0x0756,0x0812,0x08e0,0x09c3,0x0abd,0x0bd0,
0x0cff,0x0e4c,0x0fba,0x114c,0x1307,0x14ee,0x1706,0x1954,
0x1bdc,0x1ea5,0x21b6,0x2515,0x28ca,0x2cdf,0x315b,0x364b,
0x3bb9,0x41b2,0x4844,0x4f7e,0x5771,0x602f,0x69ce,0x7462,
0x7fff
};
checkLength(no);
fillBuffer(no);
if(v.state == 0x7fff) {
uint32 header = v.buffer[0];
// Minimum length is 4 words anyway, but check this later..
v.sample = header>>0;
v.state = min(88, header>>16 & 0x7f);
v.index += 8;
v.length -= 4;
}
int4 s = v.buffer[v.index/8] >> 4*(v.index & 7);
int16 x = table[v.state--];
int16 delta = x/8;
if(s & 1) delta += x/4;
if(s & 2) delta += x/2;
if(s & 4) delta += x/1, v.state += 2*(s & 3) + 3;
if(s < 0) v.sample = max(-0x7fff, v.sample - delta);
else v.sample = min(+0x7fff, v.sample + delta);
v.state = max(0, min(88, v.state));
if(++v.index & 1) return;
v.length -= 1;
}
void APU::stopVoice(unsigned no) {
auto &v = voices[no];
arm7.event.queue.remove(v.event);
v.running = false;
if(v.hold == false)
v.sample = 0;
}
void APU::fillBuffer(unsigned no) {
auto &v = voices[no];
if(v.index) return;
for(unsigned i = 0; i < 4; i++, v.source += 4)
v.buffer[i] = arm7.read(v.source, Word, true);
}
void APU::checkLength(unsigned no) {
auto &v = voices[no];
if(v.length) return;
if(v.loop.source == 0xffffffff) {
// We're passing the loop point for the first time. In particular
// the hardware will store the ADPCM state. For this reason it's not
// possible to stream ADPCM audio without software assist. :(
v.length = v.loop.length;
v.loop.sample = v.sample;
v.loop.state = v.state;
v.loop.source = v.source;
if(v.index) // Account for buffer
v.loop.source += 4*(v.index/8 - 4);
if(v.limit && v.length < 4)
return stopVoice(no);
}
else {
if(v.limit & Voice::once)
return stopVoice(no);
// There are still issues with loops here.. or possibly
// readback of the voice status.
if(v.limit & Voice::looped) {
v.source = v.loop.source;
v.length = v.loop.length;
v.sample = v.loop.sample;
v.state = v.loop.state;
v.index = 0;
}
}
}
void APU::checkEnable(unsigned no) {
auto &v = voices[no];
// maxmod's interpolated mode expects this undocumented behavior. Instead
// of software mixing, it resamples each voice's audio into a small buffer.
// Master enable is used to trigger every voice at the same time.
if(v.running == (enable && v.enable)) return;
stopVoice(no);
v.running = enable && v.enable;
if(!v.running) return;
uint32 next = 4*(0x10000 - v.init.counter);
if(unsigned k = arm7.event.queue.time & 3)
next += 4 - k; // Align to 16MHz audio clock
arm7.event.queue.add(next, v.event);
// After 1 sample..
v.length = 1;
v.event.action = [&, no]() {
uint32 period = 4*(0x10000 - v.init.counter);
// Start the channel. From testing, it seems impossible to adjust
// length1/2 afterwards, so the settings must be latched somewhere.
// The loop flags can be changed though.
v.source = v.init.source;
v.counter = v.init.counter;
v.length = 4*v.init.length1;
v.loop.length = 4*v.init.length2;
v.loop.source = 0xffffffff;
v.sample = 0;
v.index = 0;
v.state = 0x7fff;
// PCM has a couple more samples of startup latency.
// ADPCM takes a further 8 samples to process the header..
switch(v.format) {
case Voice::PCM8: // add further startup latency
case Voice::PCM16: period = 3*period; break;
case Voice::ADPCM4: period = 11*period; break;
case Voice::PSG: period = 1*period; break;
}
unsigned k = arm7.event.queue.time & 3;
if(k) period += 4 - k;
arm7.event.queue.add(period, v.event);
v.event.action = [&, no]() { stepVoice(no); };
};
}
uint32 APU::regControl() {
return volume<<0 | output[0]<<8 | output[1]<<10 | muteDsp[0]<<12 | muteDsp[1]<<13 | enable<<15;
}
uint32 APU::regBias() {
return bias;
}
uint32 APU::regCaptureControl() {
return capture[0]<<7 | capture[1]<<15;
}
uint32 APU::regCaptureDest(unsigned no) {
return captureDest[no];
}
void APU::regControl(uint32 data, uint32 mask) {
if(mask & 0x00ff) {
volume = data >> 0;
}
if(mask & 0xff00) {
output[0] = data >> 8;
output[1] = data >> 10;
muteDsp[0] = data >> 12;
muteDsp[1] = data >> 13;
enable = data >> 15;
if(enable) {
// Start any pending voices - maxmod uses this for interpolated mode.
// What it does is:
// - clear the master enable
// - enable all 16 channels (won't do anything yet)
// - set the master enable (this will trigger all 16 at once)
// - wait ~16K clocks @ 33Mhz (16 samples), then set the mixing timer.
for(unsigned no = 0; no < 16; no++)
checkEnable(no);
}
}
}
void APU::regBias(uint32 data, uint32 mask) {
bias = data;
}
void APU::regCaptureControl(uint32 data, uint32 mask) {
capture[0] = data>>7;
capture[1] = data>>15;
}
void APU::regCaptureDest(unsigned no, uint32 data, uint32 mask) {
captureDest[no] = data;
}
void APU::regCaptureLength(unsigned no, uint32 data, uint32 mask) {
captureLength[no] = data;
}
uint32 APU::regVoiceControl(unsigned no) {
auto &v = voices[no];
return v.volumeBase<<0 | v.volumeExp<<8 | v.hold<<15 | v.panning<<16
| v.duty<<24 | v.limit<<27 | v.format<<29 | (v.running)<<31;
}
void APU::regVoiceControl(unsigned no, uint32 data, uint32 mask) {
auto &v = voices[no];
int exponent[] = { 4, 3, 2, 0 };
if(mask & 0x0000007f) {
v.volumeBase = data >> 0;
v.amplitude = v.volumeBase << exponent[v.volumeExp];
}
if(mask & 0x00008300) {
v.volumeExp = data >> 8;
v.hold = data >> 15;
v.amplitude = v.volumeBase << exponent[v.volumeExp];
}
if(mask & 0x007f0000) {
v.panning = data >> 16;
}
if(mask & 0xff000000) {
v.duty = data >> 24;
v.limit = data >> 27;
v.format = data >> 29;
v.enable = data >> 31;
checkEnable(no);
}
}
void APU::regVoiceSource(unsigned no, uint32 data, uint32 mask) {
auto &v = voices[no];
v.init.source = data & 0x07fffffc;
}
void APU::regVoicePeriod(unsigned no, uint32 data, uint32 mask) {
auto &v = voices[no];
if(mask & 0x0000ffff) v.init.counter ^= (v.init.counter ^ data>>0) & mask>>0;
if(mask & 0xffff0000) v.init.length1 ^= (v.init.length1 ^ data>>16) & mask>>16;
}
void APU::regVoiceLength(unsigned no, uint32 data, uint32 mask) {
auto &v = voices[no];
v.init.length2 ^= (v.init.length2 ^ data) & mask & 0x3fffff;
}
}

View File

@ -1,100 +0,0 @@
struct APU {
void power();
void stepMixer();
void stepVoice(unsigned no);
void stepPCM8(unsigned no);
void stepPCM16(unsigned no);
void stepADPCM4(unsigned no);
void stepPulse(unsigned no);
void stepNoise(unsigned no);
void stopVoice(unsigned no);
void fillBuffer(unsigned no);
void checkLength(unsigned no);
void checkEnable(unsigned no);
uint32 regControl();
uint32 regBias();
uint32 regCaptureControl();
uint32 regCaptureDest(unsigned no);
uint32 regVoiceControl(unsigned no);
void regControl(uint32 data, uint32 mask);
void regBias(uint32 data, uint32 mask);
void regCaptureControl(uint32 data, uint32 mask);
void regCaptureDest(unsigned no, uint32 data, uint32 mask);
void regCaptureLength(unsigned no, uint32 data, uint32 mask);
void regVoiceControl(unsigned no, uint32 data, uint32 mask);
void regVoiceSource(unsigned no, uint32 data, uint32 mask);
void regVoicePeriod(unsigned no, uint32 data, uint32 mask);
void regVoiceLength(unsigned no, uint32 data, uint32 mask);
struct Voice {
uint1 enable, hold, running;
uint2 format; enum { PCM8, PCM16, ADPCM4, PSG };
uint3 duty;
uint2 limit; enum { looped=1, once=2 };
uint7 panning;
// Volume is essentially some kind of float format.
// The effective value is base * 2^-(4 >> (3-exponent)).
uint7 volumeBase;
uint2 volumeExp;
struct {
uint32 source;
uint16 counter;
uint16 length1;
uint32 length2;
} init;
struct {
uint32 source;
uint32 length;
int16 sample;
int16 state;
} loop;
uint32 source;
uint32 length;
uint16 counter;
Event event;
uint5 index; // nibble index into 16-byte buffer
uint32 buffer[4]; // holds 8, 16, or 32 buffered samples
int16 state; // used for pulse, noise, and ADPCM
int32 amplitude;
int16 sample;
} voices[16];
Event mixEvent;
// Voices 0 + 2 (L,R) are designated here as streaming audio.
// Voices 1 + 3 (L,R) are meant for DSP effects. Both are optional though.
uint1 powered;
uint1 enable;
uint10 bias;
uint7 volume;
uint2 output[2]; enum { srcMixer, srcDspL, srcDspR, srcDspMono };
uint1 muteDsp[2]; // Don't send 1+3 to the mixer - avoids feedback.
uint1 dspToStream[3]; // Mix 1+3 back into 0+2.
// Capture enables writing mixed audio to RAM - and in conjunction with
// voices 1 + 3, allows software DSP such as filtering or echo effects.
uint1 capture[2]; // Enable capturing
uint1 captureStream[2]; // ..otherwise mixer outputs
uint32 captureDest[2];
uint32 captureLength[2];
uint32 captureCount[2];
};
extern APU apu;

View File

@ -1,230 +0,0 @@
void ARMCore::armWritePsr(uint1 opcode, uint4 mask, uint32 rm) {
r[15] += 4;
if(opcode == 0) return writeCpsr(rm, mask);
if(opcode == 1) {
if(mode == USR || mode == SYS) return;
uint32 update = 0xff000000*(mask>>3) | 0xff*(mask&1);
spsr() ^= (spsr() ^ rm) & update;
}
}
void ARMCore::armReadPsr(uint1 opcode, uint4 ird) {
r[15] += 4;
if(opcode == 0) r[ird] = readCpsr();
if(opcode == 1) {
if(mode == USR || mode == SYS) return;
r[ird] = spsr();
}
if(ird == 15) branch(0, r[15]);
}
void ARMCore::armBranch(uint1 link, uint1 exch, int26 offset) {
if(link) r[14] = r[15] - 4;
branch(exch, r[15] + offset);
}
void ARMCore::armBranchEx(uint1 link, uint4 irm) {
if(link) r[14] = r[15] - 4;
branch(r[irm] & 1, r[irm]);
}
void ARMCore::armClz(uint4 ird, uint4 irm) {
auto &rd = r[ird], rm = r[irm];
r[15] += 4;
if(rm == 0) { rd = 32; return; }
rd = 0;
while(~rm & 1<<31)
rm <<= 1, rd++;
}
void ARMCore::armDspAdd(uint2 opcode, uint4 ird, uint4 irn, uint4 irm) {
auto &rd = r[ird], rn = r[irn], rm = r[irm];
r[15] += 4;
if(opcode & 2) { // qdadd/qdsub
rd = rm, rm += rm;
if(oflow(rm,rd,rd) & 1<<31)
Qf = -1, rm = 0x80000000 - (rm>>31);
}
if(opcode & 1) rd = rn - rm, (rm = ~rm); // qsub
else rd = rn + rm; // qadd
if(oflow(rd,rn,rm) & 1<<31)
Qf = -1, rd = 0x80000000 - (rd>>31);
if(ird == 15) branch(0, r[15]);
}
void ARMCore::armDspMul(uint2 opcode, uint2 xy, uint4 ird, uint4 irn, uint4 irm, uint4 irs) {
auto rn = r[irn];
r[15] += 4;
if(opcode == 1) { // smulwy, smlawy
int32 rm = r[irm];
int16 rs = r[irs] >> 16*(xy>>1);
uint32 rd = (int64) rm * rs >> 16;
if(xy & 1) {
uint32 ra = rd + rn;
Qf |= oflow(ra,rd,rn);
rd = ra;
}
r[ird] = rd;
}
else { // smulxy(3), smlaxy(0), smlalxy(2)
int16 rm = r[irm] >> 16*(xy & 1);
int16 rs = r[irs] >> 16*(xy>>1);
int64 rd = (int64) rm * rs;
if(opcode == 0) Qf |= oflow(rd+rn, rd, rn); // smlaxy
if(opcode != 3 /*accumulate*/) rd += rn; // smlaxy, smlalxy
if(opcode == 2 /*long*/) {
r[irn] = rd; rd >>= 32;
rd += r[ird];
if(irn == 15) branch(0, r[15]);
}
r[ird] = rd;
}
if(ird == 15) branch(0, r[15]);
}
void ARMCore::armMultiply(uint4 opcode, uint4 ird, uint4 irn, uint4 irm, uint4 irs) {
bool long_mul = opcode & 8, accumulate = opcode & 2;
bool signed_mul = opcode & 4, setf = opcode & 1;
auto rm = r[irm], rs = r[irs], rn = r[irn];
int64 rd = (uint64) rm * rs;
r[15] += 4;
if(setf) Zf = 0;
if(accumulate) rd += rn;
if(long_mul) {
r[irn] = rd; rd >>= 32;
if(setf) Zf |= r[irn];
if(accumulate) rd += r[ird];
if(signed_mul) {
if(rm & 1<<31) rd -= rs;
if(rs & 1<<31) rd -= rm;
}
}
r[ird] = rd;
if(setf) Nf = r[ird], Zf |= r[ird];
if(ird == 15) branch(0, r[15]);
}
void ARMCore::armData(uint5 opcode, uint4 ird, uint4 irn, SOut rm) {
auto rn = r[irn];
r[15] += 4;
alu(opcode, r[ird], rn, rm);
if(ird == 15 && (opcode & 1)) writeCpsr(spsr(), 0xf);
if(ird == 15) branch(Tf, r[15]); // use Tf here because MOVS/SUBS changes it
}
void ARMCore::armDataRs(uint5 opcode, uint4 ird, uint4 irn, uint4 irm, uint2 sh, uint4 irs) {
auto rs = r[irs];
r[15] += 4;
if(sh == 0) alu(opcode, r[ird], r[irn], lsl(r[irm], rs));
if(sh == 1) alu(opcode, r[ird], r[irn], lsr(r[irm], rs));
if(sh == 2) alu(opcode, r[ird], r[irn], asr(r[irm], rs));
if(sh == 3) alu(opcode, r[ird], r[irn], ror(r[irm], rs));
if(ird == 15 && (opcode & 1)) writeCpsr(spsr(), 0xf);
if(ird == 15) branch(Tf, r[15]);
}
void ARMCore::armMemSwap(uint1 opcode, uint4 ird, uint4 irn, uint4 irm) {
auto rn = r[irn];
r[15] += 4;
uint32 rd = load(rn, opcode? Byte : Word);
store(rn, opcode? Byte : Word, r[irm]);
r[ird] = rd;
if(ird == 15) branch(r[15] & 1, r[15]);
}
void ARMCore::armMem(uint5 opcode, uint4 ird, uint4 irn, uint32 rm) {
auto &rd = r[ird], &rn = r[irn];
uint32 update = opcode & 0x08? rn+rm : rn-rm;
uint32 addr = opcode & 0x10? update : rn;
r[15] += 4;
if((opcode & 2) || !(opcode & 0x10)) rn = update;
if(opcode & 1) rd = load(addr, opcode & 4? Byte : Word); // ldr, ldrb
else store(addr, opcode & 4? Byte : Word, rd); // str, strb
if(ird == 15) branch(r[15] & 1, r[15]);
}
void ARMCore::armMem_v4(uint5 opcode, uint2 sh, uint4 ird, uint4 irn, uint32 rm) {
auto &rd = r[ird], &rn = r[irn];
uint32 update = opcode & 0x08? rn+rm : rn-rm;
uint32 addr = opcode & 0x10? update : rn;
r[15] += 4;
if((opcode & 2) || !(opcode & 0x10)) rn = update;
if(opcode & 1) {
if(sh == 1) rd = load(addr, Half); // ldrh
if(sh == 3) rd = (int16) load(addr, Half); // ldrsh
if(sh == 2) rd = (int8) load(addr, Byte); // ldrsb
if(ird == 15) branch(r[15] & 1, r[15]);
} else {
if(sh == 1) store(addr, Half, rd); // strh
}
}
void ARMCore::armMem_v5(uint5 opcode, uint2 sh, uint4 ird, uint4 irn, uint32 rm) {
auto &rd = r[ird], &rn = r[irn];
uint32 update = opcode & 0x08? rn+rm : rn-rm;
uint32 addr = opcode & 0x10? update : rn;
r[15] += 4;
if((opcode & 2) || !(opcode & 0x10)) rn = update;
if(~opcode & 1) {
if(sh == 3) { store(addr, Word, r[ird&~1]); store(addr+4, Word, r[ird|1]); } // strd
if(sh == 2) { r[ird&~1] = load(addr, Word); r[ird|1] = load(addr+4, Word); // ldrd
if(ird >= 14) branch(r[15] & 1, r[15]); }
}
}
void ARMCore::armBlock(uint5 opcode, uint4 irn, uint16 rlist) {
unsigned index = opcode & 0x18, up = opcode & 0x08;
bool writeback = opcode & 0x02, ld = opcode & 0x01;
bool user = (opcode & 4) && !(ld && (rlist & 1<<15));
auto &rn = r[irn];
uint32 addr = rn, base = rn, size = 4*bit::count(rlist);
r[15] += 4;
if(index == 0x00) addr += 4 - size; // da
if(index == 0x10) addr += 0 - size; // db
if(index == 0x18) addr += 4; // ib
if(user) swapBank(mode);
for(unsigned b = 0, s = 0; b < 16; b++) {
if(~rlist & 1<<b) continue;
if(ld==0) { write(addr, Word, s++, r[b]); }
if(ld==1) { r[b] = read(addr, Word, s++); }
if(writeback) {
writeback = false;
rn = up? base + size : base - size;
}
addr += 4;
}
if(user) swapBank(mode);
if(ld && (rlist & 1<<15)) branch(r[15] & 1, r[15]);
}

View File

@ -1,565 +0,0 @@
ARM7TDMI::ARM7TDMI() {
bios.data = new uint32[(bios.size = 0x004000)/4]();
memset(bios.data, 0, bios.size);
}
void ARM7TDMI::Thread() { arm7.main(); }
void ARM7TDMI::power() {
// 33513982 MHz
if(thread) co_delete(thread);
thread = co_create(262144 * sizeof(void*), ARM7TDMI::Thread);
vectorBase = 0x00000000;
bxWithLoadedPC = false;
booted = 0;
flag300 = 0;
spi.enable = 0;
spi.hold = 0;
spi.irq = 0;
spi.size = 0;
spi.divider = 0;
spi.device = SPI::none;
spi.data = 0;
rtc.in[0] = rtc.in[1] = 0xf;
rtc.out[0] = rtc.out[1] = 0x0;
rtc.dir[0] = rtc.dir[1] = 0x0;
rtc.buffer = 0;
rtc.index = 0;
sio.in = 0;
sio.out = 0;
sio.dir = 0;
sio.irq = false;
sio.mode = 0;
CPUCore::power();
trace = false;
}
uint16 crc16(uint8* data, unsigned size, uint16 initial) {
uint16 table[] = { 0xa001,0xf001,0xd801,0xcc01,0xc601,0xc301,0xc181,0xc0c1 };
uint32 crc = initial;
for(unsigned i = 0; i < size; i++) {
crc ^= data[i];
for(int j = 7; j >= 0; j--)
crc = crc>>1 ^ (crc&1) * (table[j] << j);
}
return crc;
}
void ARM7TDMI::main() {
if(auto card = slot1.card) {
// ARM7 BIOS and firmware should be doing this, but it requires
// clock, card emulation and fancy things like that.
uint32 arm9src = card->rom.read(0x20, Word), arm7src = card->rom.read(0x30, Word);
uint32 arm9entry = card->rom.read(0x24, Word), arm7entry = card->rom.read(0x34, Word);
uint32 arm9dest = card->rom.read(0x28, Word), arm7dest = card->rom.read(0x38, Word);
uint32 arm9size = card->rom.read(0x2c, Word), arm7size = card->rom.read(0x3c, Word);
// Copy user settings to RAM
// - Homebrew and games (?) both require this. libnds actually attempts
// to read the settings from flash, but the struct contains a bitfield
// that GCC expands by 2 bytes, throwing the size and checksum off.
for(unsigned n = 0; n < 0x70; n += 1) {
store(0x02fffc80+n, Byte, system.firmware.read(0x3fe00+n, Byte));
}
// Check CRCs on firmware data and warn if incorrect
uint8 *wifiData = &system.firmware.data[0x0002a];
uint8 *wfcData[3] = {
&system.firmware.data[0x3fa00],
&system.firmware.data[0x3fb00],
&system.firmware.data[0x3fc00],
};
uint8 *userData[2] = {
&system.firmware.data[0x3fe00],
&system.firmware.data[0x3ff00],
};
// MAC address + Wifi chipset programming data
uint32 wifiDataLen = wifiData[2] | wifiData[3]<<8;
uint32 wifiDataExpected = wifiData[0] | wifiData[1]<<8;
uint32 wifiDataActual = crc16(wifiData+2, wifiDataLen, 0);
if(wifiDataExpected != wifiDataActual)
print("Warning: Wifi chipset data: crc is ",
hex<4>(wifiDataActual),"; expected ",hex<4>(wifiDataExpected),"\n");
// Nintendo Wifi Connection - access point IDs + WEP passwords
for(unsigned i = 0; i < 3; i++) {
uint32 expected = wfcData[i][0xfe] | wfcData[i][0xff]<<8;
uint32 actual = crc16(wfcData[i], 0xfe, 0);
if(expected != actual)
print("Warning: WFC access point #",i,": crc is ",
hex<4>(actual),"; expected ",hex<4>(expected),"\n");
}
// User settings area - nickname, birthday, favorite color etc.
for(unsigned i = 0; i < 2; i++) {
uint32 expected = userData[i][0x72] | userData[i][0x73]<<8;
uint32 actual = crc16(userData[i], 0x70, 0xffff);
if(expected != actual)
print("Warning: User settings #",i,": crc is ",
hex<4>(actual),"; expected ",hex<4>(expected),"\n");
}
// Copy header into RAM
for(unsigned n = 0; n < 0x200; n += 4) {
write(0x02fffe00+n, Word, 0, card->rom.read(n, Word));
}
// Copy executables
if(0x200 <= arm9src && arm9src + arm9size <= card->rom.size) {
if(0x02000000 <= arm9dest && arm9dest + arm9size <= 0x023bfe00) {
for(unsigned n = 0; n < arm9size; n += 4)
write(arm9dest + n, Word, 0, card->rom.read(arm9src + n, Word));
}
}
if(0x200 <= arm7src && arm7src + arm7size <= card->rom.size) {
if(0x02000000 <= arm7dest && arm7dest + arm7size <= 0x023bfe00
|| 0x037f8000 <= arm7dest && arm7dest + arm7size <= 0x03807e00) {
for(unsigned n = 0; n < arm7size; n += 4)
write(arm7dest + n, Word, 0, card->rom.read(arm7src + n, Word));
}
}
// Should write the card's ROM ID to RAM too... where is it again?
arm7.booted = 0;
arm9.booted = 0;
arm7.writeCpsr(0xdf, 0xf);
arm7.branch(arm7entry & 1, arm7entry);
arm7.r[15] = arm7entry; // pc
arm7.r[14] = arm7entry; // lr
arm7.r[13] = 0x03007f00; // sp
arm7.r_irq[0] = 0x03007fa0; // sp_irq
arm7.r_svc[0] = 0x03007fe0; // sp_svc
arm9.writeCpsr(0xdf, 0xf);
arm9.branch(arm9entry & 1, arm9entry);
arm9.r[15] = arm9entry; // pc
arm9.r[14] = arm9entry; // lr
arm9.r[13] = 0x00803ec0; // sp
arm9.r_irq[0] = 0x00803fa0; // sp_irq
arm9.r_svc[0] = 0x00803fc0; // sp_svc
}
arm7.event.queue.time -= arm7.clock;
for(unsigned n=1; n <= arm7.event.queue.size; n++)
arm7.event.queue.items[n]->time -= arm7.clock;
arm7.clock = 0;
for(;;) {
if(arm7.clock >= 256)
co_switch(arm9.thread);
event.irq = interrupt.gate && (interrupt.enable & interrupt.flags);
//if(event.irq && !If) {
// print("arm7: irq (if=",hex<8>(interrupt.flags),")\n");
//}
if(Tf) execTHUMB();
else execARM();
}
}
void ARM7TDMI::istep(unsigned clocks) {
arm7.clock += 2*clocks;
event.queue.step(2*clocks);
}
void ARM7TDMI::step(unsigned clocks) {
return istep(clocks);
}
void ARM7TDMI::execARM() {
if(branched) {
//if(r[15] == 0) print(hex<8>(r[14]), ": jump to nullptr!\n");
branched = false; r[15] &= ~3;
iexecute = fetch(r[15], Word, 0); r[15] += 4;
idecode = fetch(r[15], Word, 1); r[15] += 4;
} else {
iexecute = idecode, idecode = ifetch;
}
ifetch = fetch(r[15], Word, 1);
uint32 i = iexecute;
if(event.irq && !If) return irq();
//if(trace && r[15] >= 0x2000000) traceInsn();
if(!evalCond(i>>28)) { r[15] += 4; return; }
// opcode, sh, Rd, Rn, Rm, Rs
if(imatch("00x10xx0/////")) {
if(imatch("00110r10/// ..../")) return armWritePsr(i>>22, i>>16, armImmed(i, i>>8));
if(imatch("00010r10/// 0000/")) return armWritePsr(i>>22, i>>16, armRm(i));
if(imatch("00010r00/// 0000/")) return armReadPsr (i>>22, i>>12);
if(imatch("00010010/// 00l1/")) return armBranchEx( i>>5, i);
if(imatch("00010b00/// 1001/")) return armMemSwap (i>>22, i>>12, i>>16, i);
}
if(imatch("000...../// 1..1/")) {
if(imatch("0000luas/// 1001/")) return armMultiply(i>>20, i>>16, i>>12, i, i>>8);
if(imatch("000pu0wl/// 1sh1/")) return armMem_v4 (i>>20, i>>5, i>>12, i>>16, armRm(i));
if(imatch("000pu1wl/// 1sh1/")) return armMem_v4 (i>>20, i>>5, i>>12, i>>16, armOffset8(i, i>>8));
}
if(imatch("001aaaas/// ..../")) return armData (i>>20, i>>12, i>>16, armImmed(i, i>>8));
if(imatch("000aaaas/// .sh0/")) return armData (i>>20, i>>12, i>>16, shiftImm(i, i>>5, i>>7));
if(imatch("010pubwl/// ..../")) return armMem (i>>20, i>>12, i>>16, armOffset12(i));
if(imatch("011pubwl/// ...0/")) return armMem (i>>20, i>>12, i>>16, shiftImm(i, i>>5, i>>7));
if(imatch("100puswl/// ..../")) return armBlock (i>>20, i>>16, i);
if(imatch("101l..../// ..../")) return armBranch (i>>24, 0, i<<2);
if(imatch("000aaaas/// 0sh1/")) return armDataRs (i>>20, i>>12, i>>16, i, i>>5, i>>8);
if(imatch("1111..../// ..../")) return swi ();
return undefined();
}
void ARM7TDMI::execTHUMB() {
if(branched) {
//if(r[15] == 0) print(hex<8>(r[14]), ": jump to nullptr!\n");
branched = false; r[15] &= ~1;
iexecute = fetch(r[15], Half, 0) >> 8*(r[15] & 2) & 0xffff; r[15] += 2;
idecode = fetch(r[15], Half, 1) >> 8*(r[15] & 2) & 0xffff; r[15] += 2;
} else {
iexecute = idecode, idecode = ifetch;
}
ifetch = fetch(r[15], Half, 1) >> 8*(r[15] & 2) & 0xffff;
uint16 i = iexecute;
if(event.irq && !If) return irq();
//if(trace && r[15] >= 0x2000000) traceInsn();
if(imatch("00011ismmmnnnddd")) return thumbAddSub (i>>9, i, i>>3, i>>6);
if(imatch("000ssiiiiimmmddd")) return thumbShiftImm(i>>11, i, i>>3, i>>6);
if(imatch("001oodddiiiiiiii")) return thumbDataImm (i>>11, i>>8, i);
if(imatch("010000oooommmddd")) return thumbDataLo (i>>6, i, i>>3);
if(imatch("010001oodmmmmddd")) return thumbDataHi (i>>8, (i&7)+(i>>4&8), i>>3);
if(imatch("0101ooommmnnnddd")) return thumbMemReg (i>>9, i, i>>3, i>>6);
if(imatch("011bliiiiinnnddd")) return thumbMemImm (i>>11, i, i>>3, i>>6);
if(imatch("1000liiiiinnnddd")) return thumbMemImm (i>>11, i, i>>3, i>>6);
if(imatch("10110000siiiiiii")) return thumbAddSP ( i>>7, i);
if(imatch("01001dddrrrrrrrr")) return thumbRelative(i>>11, i>>8, i);
if(imatch("1001odddrrrrrrrr")) return thumbRelative(i>>11, i>>8, i);
if(imatch("1010odddrrrrrrrr")) return thumbRelative(i>>11, i>>8, i);
if(imatch("1101ccccrrrrrrrr")) return thumbCond ( i>>8, i);
if(imatch("11100rrrrrrrrrrr")) return thumbBranch (i);
if(imatch("11110rrrrrrrrrrr")) return thumbBh (i);
if(imatch("11111rrrrrrrrrrr")) return thumbBlx (1, i);
if(imatch("1o..lnnnrrrrrrrr")) return thumbBlock (i>>11, i>>8, i);
return undefined();
}
uint32 ARM7TDMI::fetch(uint32 addr, uint32 size, bool s) {
return read(addr, size, s);
}
uint32 ARM7TDMI::read(uint32 addr, uint32 size, bool s) {
const int h = size==Word? 2 : 1; // 16-bit bus timing
const int e = 8*!s + h; // EWRAM timing
//if((addr & 0xf3ff000) == 0x23ff000) {
// istep(e);
// uint32 data = system.ewram.read(addr % 0x400000, size);
// print(hex<8>(arm7.event.queue.time), " ",hex<8>(r[15])," arm7: r ",hex<8>(addr),":",size," = ",hex<8>(data),"\n");
// return data;
//}
switch(addr >> 23) {
case 0x00>>3:
case 0x08>>3: istep(1); addr &= bios.size-1; return bios.read(addr, size);
case 0x20>>3:
case 0x28>>3: istep(e); addr %= 0x400000; return system.ewram.read(addr, size);
case 0x30>>3: istep(1); addr %= 0x008000; return system.swram[addr>>14].read(addr % 0x4000, size);
case 0x38>>3: istep(1); addr %= 0x010000; return system.iwram.read(addr, size);
case 0x40>>3: istep(1); { // return readReg(addr, size);
uint32 data = readReg(addr, size);
//print(hex<8>(arm7.event.queue.time), " ", hex<8>(r[15])," arm7: r ",hex<8>(addr),":",size," = ",hex<8>(data),"\n");
return data;
}
case 0x48>>3: istep(1); return wifi.read(addr, size);
case 0x60>>3:
case 0x68>>3: istep(h); addr %= 0x040000; return system.vmap.arm7[addr>>14].read(addr, size);
}
istep(1);
return 0;
}
void ARM7TDMI::write(uint32 addr, uint32 size, bool s, uint32 data) {
const int h = size==Word? 2 : 1; // 16-bit bus timing
const int e = 8*!s + h; // EWRAM timing
//if((addr & 0xf3ff000) == 0x23ff000)
// print(hex<8>(arm7.event.queue.time), " ",hex<8>(r[15]), " arm7: w ",hex<8>(addr),":",size," = ",hex<8>(data),"\n");
switch(addr >> 23) {
case 0x20>>3:
case 0x28>>3: istep(e); addr %= 0x400000; return system.ewram.write(addr, size, data);
case 0x30>>3: istep(1); addr %= 0x008000; return system.swram[addr>>14].write(addr % 0x4000, size, data);
case 0x38>>3: istep(1); addr %= 0x010000; return system.iwram.write(addr, size, data);
case 0x40>>3: istep(1); { // return writeReg(addr, size, data);
//if(addr != 0x4000301)
// print(hex<8>(arm7.event.queue.time), " ", hex<8>(r[15])," arm7: w ",hex<8>(addr),":",size," = ",hex<8>(data),"\n");
writeReg(addr, size, data);
return;
}
case 0x48>>3: istep(1); return wifi.write(addr, size, data);
case 0x60>>3:
case 0x68>>3: istep(h); addr %= 0x040000; if(size > Byte) return system.vmap.arm7[addr>>14].write(addr, size, data);
// STRB - special case, only works on ARM7
auto &page = system.vmap.arm7[addr>>14];
page[addr] &= addr & 1? 0x00ff : 0xff00;
page[addr] |= addr & 1? data & 0xff00 : data & 0x00ff;
return;
}
istep(1);
}
void ARM7TDMI::dataCop(uint4 cpno, uint4 op1, uint4 ird, uint4 irn, uint4 irm, uint4 op2) {
return undefined();
}
uint32 ARM7TDMI::readReg(uint32 addr, uint32 size) {
switch(addr-0x4000000 & ~3) {
case 0x134: {
// Aux GPIO
// - missing link port (SI pin = clock /IRQ)
// - misc inputs
uint8 keys = 0;
for(unsigned n = 10; n < 16; n++) {
if(interface->inputPoll(ID::Port::Buttons, 0, n))
keys += 1 << n-10;
}
if(system.touchscreen.penDown()) keys += 1<<6;
if(interface->inputPoll(ID::Port::Sensors, 0, ID::Sensors::Lid)==0) keys += 1<<7;
return (keys ^ 0xff)<<16 | regSio()<<0;
}
case 0x138: return regRtc(); // GPIO - system clock
case 0x1c0: return regSpi(); // SPI - power, touch, firmware flash
// Memory status, slot 2
case 0x204: return arm9.ramPriority<<15 | arm9.slot1access<<11
| arm9.slot2access<< 7 | regSlot2Control();
// Wifi waitstates
case 0x206: break;
case 0x240: break; // VRAM status
case 0x241: break; // SWRAM status
case 0x300: return flag300<<1 | booted<<0;
case 0x304: return apu.powered<<0 | wifi.powered<<1;
case 0x400: case 0x410: case 0x420: case 0x430:
case 0x440: case 0x450: case 0x460: case 0x470:
case 0x480: case 0x490: case 0x4a0: case 0x4b0:
case 0x4c0: case 0x4d0: case 0x4e0: case 0x4f0:
return apu.regVoiceControl(addr>>4 & 15);
case 0x500: return apu.regControl();
case 0x504: return apu.regBias();
case 0x508: return apu.regCaptureControl();
case 0x510: return apu.regCaptureDest(0);
case 0x518: return apu.regCaptureDest(1);
}
return CPUCore::readReg(addr, size);
}
void ARM7TDMI::writeReg(uint32 addr, uint32 size, uint32 data) {
uint32 mask = 0xffffffff;
if(size == Half) mask = 0xffff << 8*(addr & 2);
if(size == Byte) mask = 0xff << 8*(addr & 3);
data &= mask;
switch(addr-0x4000000 & ~3) {
case 0x134: return regSio(data, mask); // GPIO - missing link port
case 0x138: return regRtc(data, mask); // GPIO - system clock
case 0x1c0: return regSpi(data, mask); // SPI - power, touch, firmware flash
case 0x204: return regSlot2Control(data, mask);
case 0x206: break; // Wifi waitstates
case 0x300: // System mode
booted |= data & 1; // not possible to clear it
if(mask & 0xff)
flag300 = data >> 1;
// 0x4000: halt arm9 and enter GBA mode
// 0x8000: halt arm7 and wait for IRQ
// 0xc000: halt system and wait for IRQ (low-power sleep mode)
//
// Nearly everything is frozen in sleep mode, including timers. Only
// a few things can generate interrupts to bring it back out:
// - lid sensor, buttons
// - alarm from real-time clock
// - wireless chipset (?)
// - cart in slot 1 or 2 (?)
if(data & 0xc000) {
//print("arm7: wait (ime=",interrupt.gate,", ie=",hex<8>(interrupt.enable),")\n");
powerState = waiting;
for(;;) {
if((interrupt.enable & interrupt.flags)) {
powerState = running;
break;
}
istep(256);
if(arm7.clock >= 256)
co_switch(arm9.thread);
}
}
return;
case 0x304: // Power
if(mask & 0xff) {
apu.powered = data>>0;
wifi.powered = data>>1;
}
return;
case 0x308: break; // BIOS protection
case 0x400: case 0x410: case 0x420: case 0x430:
case 0x440: case 0x450: case 0x460: case 0x470:
case 0x480: case 0x490: case 0x4a0: case 0x4b0:
case 0x4c0: case 0x4d0: case 0x4e0: case 0x4f0:
return apu.regVoiceControl(addr>>4 & 15, data, mask);
case 0x404: case 0x414: case 0x424: case 0x434:
case 0x444: case 0x454: case 0x464: case 0x474:
case 0x484: case 0x494: case 0x4a4: case 0x4b4:
case 0x4c4: case 0x4d4: case 0x4e4: case 0x4f4:
return apu.regVoiceSource(addr>>4 & 15, data, mask);
case 0x408: case 0x418: case 0x428: case 0x438:
case 0x448: case 0x458: case 0x468: case 0x478:
case 0x488: case 0x498: case 0x4a8: case 0x4b8:
case 0x4c8: case 0x4d8: case 0x4e8: case 0x4f8:
return apu.regVoicePeriod(addr>>4 & 15, data, mask);
case 0x40c: case 0x41c: case 0x42c: case 0x43c:
case 0x44c: case 0x45c: case 0x46c: case 0x47c:
case 0x48c: case 0x49c: case 0x4ac: case 0x4bc:
case 0x4cc: case 0x4dc: case 0x4ec: case 0x4fc:
return apu.regVoiceLength(addr>>4 & 15, data, mask);
case 0x500: return apu.regControl(data, mask);
case 0x504: return apu.regBias(data, mask);
case 0x508: return apu.regCaptureControl(data, mask);
case 0x510: return apu.regCaptureDest(0, data, mask);
case 0x514: return apu.regCaptureLength(0, data, mask);
case 0x518: return apu.regCaptureDest(1, data, mask);
case 0x51c: return apu.regCaptureLength(1, data, mask);
}
return CPUCore::writeReg(addr, size, data);
}
uint32 ARM7TDMI::regSpi() {
return spi.divider<<0 | spi.device<<8 | spi.size<<10
| spi.hold<<11 | spi.irq<<14 | spi.enable<<15
| spi.data<<16;
}
void ARM7TDMI::regSpi(uint32 data, uint32 mask) {
if(mask & 0x000000ff) {
spi.divider = data>>0;
}
if(mask & 0x0000ff00) {
spi.device = data>>8;
spi.size = data>>10;
spi.hold = data>>11;
spi.irq = data>>14;
spi.enable = data>>15;
if(spi.enable == false) {
// Hrm, must be implicit as libnds does not release the hold
// before last transfer - or is spi.hold not delayed after all?
system.firmware.select(false);
system.powerMgr.select(false);
system.touchscreen.select(false);
}
}
if(mask & 0xffff0000) {
if(!spi.enable) return;
SPIDevice* device = nullptr;
if(spi.device == SPI::flash) device = &system.firmware;
if(spi.device == SPI::power) device = &system.powerMgr;
if(spi.device == SPI::touch) device = &system.touchscreen;
if(device) {
// SPI transfers are bidirectional so there's always a read+write.
device->select(true);
spi.data = device->transfer(data>>16);
device->select(spi.hold);
}
}
}
uint32 ARM7TDMI::regRtc() {
// I've blithely assumed: pin = (out | ~dir) & in
//
// Something like that is necessary because Nintendo read-modify-writes
// the output pins.
return uint4( (rtc.out[0] | ~rtc.dir[0]) & rtc.in[0] )<<0 | rtc.dir[0]<<4
| uint4( (rtc.out[1] | ~rtc.dir[1]) & rtc.in[1] )<<8 | rtc.dir[1]<<12;
}
void ARM7TDMI::regRtc(uint32 data, uint32 mask) {
// It would've been nice if the clock was on the SPI bus, like the touchpad,
// firmware and power chips.. it seems that wasn't possible because of the
// bi-directional data pin. Software has to bit-bang the GPIO pins instead.
if(mask & 0x00ff) {
rtc.out[0] = data>>0; // if dir==1, use output from the DS side.
rtc.dir[0] = data>>4; // if dir==0, use as input with pullup (?)
}
if(mask & 0xff00) {
rtc.out[1] = data>>0; // these may exist, but don't appear to be used.
rtc.dir[1] = data>>4;
}
// All pins can be either inputs or outputs.
// "in" is the value being driven externally (or 1, if not).
uint4 pins = (rtc.out[0] | ~rtc.dir[0]) & rtc.in[0];
rtc.in[0] = system.clock.io(pins);
}
uint32 ARM7TDMI::regSio() {
return sio.in<<0 | sio.dir<<4 | sio.irq<<8 | sio.mode<<14;
}
void ARM7TDMI::regSio(uint32 data, uint32 mask) {
sio.out = data>>0;
sio.dir = data>>4;
sio.irq = data>>8;
sio.mode = data>>14;
}

View File

@ -1,697 +0,0 @@
ARM946ES::ARM946ES() {
itcm.data = new uint32_t[(itcm.size = 0x8000)/4];
dtcm.data = new uint32_t[(dtcm.size = 0x4000)/4];
bios.data = new uint32[(bios.size = 0x001000)/4]();
memset(bios.data, 0, bios.size);
}
void ARM946ES::Thread() { arm9.main(); }
void ARM946ES::power() {
// 2 * 33513982 MHz
if(thread) co_delete(thread);
thread = co_create(262144 * sizeof(void*), ARM946ES::Thread);
vectorBase = 0xffff0000;
bxWithLoadedPC = true;
insnLatch = 0;
memset(itcm.data, 0, itcm.size);
memset(dtcm.data, 0, dtcm.size);
divMode = 0; rootMode = 0;
divByZero = 0; rootBusy = 0;
divBusy = 0; square = 0;
numerator = 0; squareRoot = 0;
denominator = 0;
quotient = 0;
remainder = 0;
booted = 0;
flag300 = 0;
slot1access = 1;
slot2access = 1;
ramPriority = 1;
CPUCore::power();
trace = false;
control.mmu = false;
control.dcache = false;
control.icache = false;
control.dtcm = false;
control.itcm = false;
control.dtcmLoad = false;
control.itcmLoad = false;
control.endian = CR::little;
control.cachePolicy = CR::random;
}
void ARM946ES::istep(unsigned clocks) {
arm7.clock -= clocks;
event.queue.step(clocks);
}
void ARM946ES::step(unsigned clocks) {
return istep(clocks);
}
void ARM946ES::main() {
itcmRegion = 0x00000000 | 14<<1; control.itcm = true;
dtcmRegion = 0x00800000 | 14<<1; control.dtcm = true;
updateTcm();
for(;;) {
if(arm7.clock < -256)
co_switch(arm7.thread);
event.irq = interrupt.gate && (interrupt.enable & interrupt.flags);
//if(event.irq && !If) {
// print("arm9: irq (if=",hex<8>(interrupt.flags),")\n");
//}
if(Tf) execTHUMB();
else execARM();
}
}
void ARM946ES::execARM() {
if(branched) {
//if(r[15] == 0) print(hex<8>(r[14]), ": jump to nullptr!\n");
branched = false; r[15] &= ~3;
iexecute = fetch(r[15], Word, 0); r[15] += 4;
idecode = fetch(r[15], Word, 0); r[15] += 4;
} else {
iexecute = idecode, idecode = ifetch;
}
ifetch = fetch(r[15], Word, 1);
uint32 i = iexecute;
if(event.irq && !If) return irq();
//if(trace) traceInsn();
if(i < 0xe0000000 && !evalCond(i>>28)) { r[15] += 4; return; }
// opcode, sh, Rd, Rn, Rm, Rs
if(imatch("1111......../////")) {
if(imatch("111101x1u101/////")) { r[15] += 4; return; } // pld - reportedly NOP on DS?
if(imatch("1111101l..../////")) return armBranch (1, 1, i<<2 | (i>>23 & 2));
return undefined();
}
if(imatch("00x10xx0/////")) {
if(imatch("00110r10/// ..../")) return armWritePsr(i>>22, i>>16, armImmed(i, i>>8));
if(imatch("00010r10/// 0000/")) return armWritePsr(i>>22, i>>16, armRm(i));
if(imatch("00010r00/// 0000/")) return armReadPsr (i>>22, i>>12);
if(imatch("00010010/// 00l1/")) return armBranchEx( i>>5, i);
if(imatch("00010010/// 0111/")) return pfabort (); // bkpt
if(imatch("00010110/// 0001/")) return armClz ( i>>12, i);
if(imatch("00010ds0/// 0101/")) return armDspAdd (i>>21, i>>12, i>>16, i);
if(imatch("00010oo0/// 1yx0/")) return armDspMul (i>>21, i>>5, i>>16, i>>12, i, i>>8);
if(imatch("00010b00/// 1001/")) return armMemSwap (i>>22, i>>12, i>>16, i);
}
if(imatch("000...../// 1..1/")) {
if(imatch("0000luas/// 1001/")) return armMultiply(i>>20, i>>16, i>>12, i, i>>8);
if(imatch("000pu0w0/// 11s1/")) return armMem_v5 (i>>20, i>>5, i>>12, i>>16, armRm(i));
if(imatch("000pu0wl/// 1sh1/")) return armMem_v4 (i>>20, i>>5, i>>12, i>>16, armRm(i));
if(imatch("000pu1w0/// 11s1/")) return armMem_v5 (i>>20, i>>5, i>>12, i>>16, armOffset8(i, i>>8));
if(imatch("000pu1wl/// 1sh1/")) return armMem_v4 (i>>20, i>>5, i>>12, i>>16, armOffset8(i, i>>8));
}
if(imatch("001aaaas/// ..../")) return armData (i>>20, i>>12, i>>16, armImmed(i, i>>8));
if(imatch("000aaaas/// .sh0/")) return armData (i>>20, i>>12, i>>16, shiftImm(i, i>>5, i>>7));
if(imatch("010pubwl/// ..../")) return armMem (i>>20, i>>12, i>>16, armOffset12(i));
if(imatch("011pubwl/// ...0/")) return armMem (i>>20, i>>12, i>>16, shiftImm(i, i>>5, i>>7));
if(imatch("100puswl/// ..../")) return armBlock (i>>20, i>>16, i);
if(imatch("101l..../// ..../")) return armBranch (i>>24, 0, i<<2);
if(imatch("000aaaas/// 0sh1/")) return armDataRs (i>>20, i>>12, i>>16, i, i>>5, i>>8);
if(imatch("1110..../// ..../")) return dataCop (i>>8, i>>20, i>>12, i>>16, i, i>>4);
if(imatch("1111..../// ..../")) return swi();
return undefined();
}
void ARM946ES::execTHUMB() {
if(branched) {
//if(r[15] == 0) print(hex<8>(r[14]), ": jump to nullptr!\n");
branched = false; r[15] &= ~1;
iexecute = fetch(r[15], Half, 0) >> 8*(r[15] & 2) & 0xffff; r[15] += 2;
idecode = fetch(r[15], Half, 1) >> 8*(r[15] & 2) & 0xffff; r[15] += 2;
} else {
iexecute = idecode, idecode = ifetch;
}
ifetch = fetch(r[15], Half, 1) >> 8*(r[15] & 2) & 0xffff;
uint16 i = iexecute;
if(event.irq && !If) return irq();
//if(trace) traceInsn();
if(imatch("00011ismmmnnnddd")) return thumbAddSub (i>>9, i, i>>3, i>>6);
if(imatch("000ssiiiiimmmddd")) return thumbShiftImm(i>>11, i, i>>3, i>>6);
if(imatch("001oodddiiiiiiii")) return thumbDataImm (i>>11, i>>8, i);
if(imatch("010000oooommmddd")) return thumbDataLo (i>>6, i, i>>3);
if(imatch("010001oodmmmmddd")) return thumbDataHi (i>>8, (i&7)+(i>>4&8), i>>3);
if(imatch("0101ooommmnnnddd")) return thumbMemReg (i>>9, i, i>>3, i>>6);
if(imatch("011bliiiiinnnddd")) return thumbMemImm (i>>11, i, i>>3, i>>6);
if(imatch("1000liiiiinnnddd")) return thumbMemImm (i>>11, i, i>>3, i>>6);
if(imatch("10111110........")) return pfabort(); // bkpt
if(imatch("10110000siiiiiii")) return thumbAddSP ( i>>7, i);
if(imatch("01001dddrrrrrrrr")) return thumbRelative(i>>11, i>>8, i);
if(imatch("1001odddrrrrrrrr")) return thumbRelative(i>>11, i>>8, i);
if(imatch("1010odddrrrrrrrr")) return thumbRelative(i>>11, i>>8, i);
if(imatch("1101ccccrrrrrrrr")) return thumbCond ( i>>8, i);
if(imatch("11100rrrrrrrrrrr")) return thumbBranch (i);
if(imatch("11110rrrrrrrrrrr")) return thumbBh (i);
if(imatch("111t1rrrrrrrrrrr")) return thumbBlx (i>>12, i);
if(imatch("1o..lnnnrrrrrrrr")) return thumbBlock (i>>11, i>>8, i);
return undefined();
}
uint32 ARM946ES::fetch(uint32 addr, uint32 size, bool s) {
istep(1); // Assume cached for now
if(itcmRCompare == (itcmRMask & addr)) { return itcm.read(addr % 0x8000, size); }
/* DTCM is not executable */
switch(addr >> 24) {
case 0x2: return system.ewram.read(addr % 0x400000, size);
case 0x3: return system.swram[addr>>14 & 1].read(addr % 0x4000, size);
case 0x6: return system.vmap.arm9[addr>>21 & 7][addr>>14 & 63].read(addr, size);
}
if(addr >= 0xffff0000) return bios.read(addr & bios.size-1, size);
return 0;
}
uint32 ARM946ES::read(uint32 addr, uint32 size, bool s) {
if(itcmRCompare == (itcmRMask & addr)) { istep(1); return itcm.read(addr % 0x8000, size); }
if(dtcmRCompare == (dtcmRMask & addr)) { return dtcm.read(addr % 0x4000, size); }
const int w = 6*!s + 2; // 32-bit bus timing
const int h = 6*!s + (size==Word? 4 : 2); // 16-bit bus timing
//const int e = 2*!s + h; // EWRAM timing
//if((addr & 0xf3ff000) == 0x23ff000) {
// istep(e);
// uint32 data = system.ewram.read(addr % 0x400000, size);
// print(hex<8>(arm9.event.queue.time), " ",hex<8>(r[15])," arm9: r ",hex<8>(addr),":",size," = ",hex<8>(data),"\n");
// return data;
//}
switch(addr >> 24) {
case 0x2: istep(3); return system.ewram.read(addr % 0x400000, size);
case 0x3: istep(w); return system.swram[addr>>14 & 1].read(addr % 0x4000, size);
case 0x4: istep(w); { //return readReg(addr, size); {
uint32 data = readReg(addr, size);
//if(addr != 0x40001a0 && addr != 0x40001a4 && addr != 0x4100010)
// print(hex<8>(arm9.event.queue.time), " ",hex<8>(r[15])," arm9: r ",hex<8>(addr),":",size," = ",hex<8>(data),"\n");
return data;
}
case 0x5: istep(h); return ppu[addr>>10 & 1].readPalette(addr % 0x400);
case 0x6: istep(h); return system.vmap.arm9[addr>>21 & 7][addr>>14 & 63].read(addr, size);
case 0x7: istep(w); return ppu[addr>>10 & 1].readOam(addr % 0x400);
}
istep(w); if(addr >= 0xffff0000) return bios.read(addr & bios.size-1, size);
return 0;
}
void ARM946ES::write(uint32 addr, uint32 size, bool s, uint32 data) {
if(itcmWCompare == (itcmWMask & addr)) { istep(1); return itcm.write(addr % 0x8000, size, data); }
if(dtcmWCompare == (dtcmWMask & addr)) { return dtcm.write(addr % 0x4000, size, data); }
//if((addr & 0xf3ff000) == 0x23ff000)
// print(hex<8>(arm9.event.queue.time), " ",hex<8>(r[15]), " arm9: w ",hex<8>(addr),":",size," = ",hex<8>(data),"\n");
const int w = 6*!s + 2; // 32-bit bus timing
const int h = 6*!s + (size==Word? 4 : 2); // 16-bit bus timing
//const int e = 2*!s + h; // EWRAM timing
switch(addr >> 24) {
case 0x2: istep(3); return system.ewram.write(addr % 0x400000, size, data);
case 0x3: istep(w); return system.swram[addr>>16 & 1].write(addr % 0x4000, size, data);
case 0x4: istep(w); { //return writeReg(addr, size, data);
//if(addr != 0x04000400)
// print(hex<8>(arm9.event.queue.time), " ", hex<8>(r[15])," arm9: w ",hex<8>(addr),":",size," = ",hex<8>(data),"\n");
writeReg(addr, size, data);
return;
}
case 0x5: istep(h); return ppu[addr>>10 & 1].writePalette(addr % 0x400, size, data);
case 0x6: istep(h); return system.vmap.arm9[addr>>21 & 7][addr>>14 & 63].write(addr, size, data);
case 0x7: istep(w); return ppu[addr>>10 & 1].writeOam(addr % 0x400, size, data);
}
istep(w);
}
void ARM946ES::dataCop(uint4 cpno, uint4 op1, uint4 ird, uint4 irn, uint4 irm, uint4 op2) {
if(cpno == 15) {
auto &rd = r[ird];
uint32 rm = r[irm];
r[15] += 4;
if(op2 & 1) { // MRC, MCR
switch(op1<<12 | irn<<8 | irm<<4 | op2>>1) {
case 0x1000: // c0,c0,0 Processor ID
rd = 'A'<<24; // Implementor ARM
rd |= 0<<20; // Variant 0
rd |= 5<<16; // Architecture v5
rd |= 0x946<< 4; // Model 946
rd |= 1<< 0; // Revision 1
return;
case 0x1001: // c0,c0,1 Cache information
rd = 0x0f<<24; // type B Harvard
rd |= 0x0d2<<12; // 4K data, 4-way, 32-byte lines
rd |= 0x112<< 0; // 8K insn, 4-way, 32-byte lines
return;
case 0x1002: // c0,c0,2 TCM information
rd = 5<<18; // 16K dtcm
rd |= 6<< 6; // 32K itcm
return;
case 0x1100: // c1,c0,0 Control register
rd = control.mmu<<0 | control.dcache<<2 | 1<<3 /*write buffer*/
| 1<<4 | 1<<5 | 1<<6 /*32 bit only, late abort model*/
| control.endian<<7 | control.icache<<12 | !!vectorBase<<13
| control.cachePolicy<<14 | !bxWithLoadedPC<<15
| control.dtcm<<16 | control.dtcmLoad<<17
| control.itcm<<18 | control.itcmLoad<<19;
return;
case 0x0100:
control.mmu = rd >> 0;
control.dcache = rd >> 1;
control.endian = rd >> 7;
control.icache = rd >> 12;
vectorBase = rd & 1<<13? 0xffff0000 : 0;
control.cachePolicy = rd >> 14;
bxWithLoadedPC = rd & 1<<15? false : true;
control.dtcm = rd >> 16;
control.dtcmLoad = rd >> 17;
control.itcm = rd >> 18;
control.itcmLoad = rd >> 19;
updateTcm();
return;
case 0x0900: case 0x1900: // c9,c0,0 Lock dcache
case 0x0901: case 0x1901: // c9,c0,1 Lock icache
return;
case 0x0910: // c9,c1,0 DTCM mapping
dtcmRegion = rd;
updateTcm();
return;
case 0x1910:
rd = dtcmRegion;
return;
case 0x0911: // c9,c1,1 ITCM mapping
itcmRegion = rd;
updateTcm();
return;
case 0x1911:
rd = itcmRegion;
return;
case 0x0200: case 0x1200: // c2,c0,0 Region data cache bits
case 0x0201: case 0x1201: // c2,c0,1 Region insn cache bits
case 0x0300: case 0x1300: // c3,c0,0 Region data buffer bits
case 0x0500: case 0x1500: // c5,c0,0 Region data permissions
case 0x0501: case 0x1501: // c5,c0,1 Region insn permissions
case 0x0502: case 0x1502: // c5,c0,2 Region data permissions - extended
case 0x0503: case 0x1503: // c5,c0,3 Region insn permissions - extended
return;
case 0x0600: case 0x1600: case 0x0601: case 0x1601: // c6,cN,0 Region mappings
case 0x0610: case 0x1610: case 0x0611: case 0x1611:
case 0x0620: case 0x1620: case 0x0621: case 0x1621:
case 0x0630: case 0x1630: case 0x0631: case 0x1631:
case 0x0640: case 0x1640: case 0x0641: case 0x1641:
case 0x0650: case 0x1650: case 0x0651: case 0x1651:
case 0x0660: case 0x1660: case 0x0661: case 0x1661:
case 0x0670: case 0x1670: case 0x0671: case 0x1671:
return;
case 0x0704: // c7,c0,4 Wait for Interrupt
case 0x0782: // c7,c8,2 (alternate version)
//print("arm9: wait (ime=",interrupt.gate,", ie=",hex<8>(interrupt.enable),")\n");
powerState = waiting;
for(;;) {
// Because this one is integrated with the ARM9,
// it's "behind" the IRQ line and knows nothing about IME/IE/IF.
if(interrupt.gate && (interrupt.enable & interrupt.flags)) {
powerState = running;
break;
}
istep(256);
if(arm7.clock < -256)
co_switch(arm7.thread);
}
return;
case 0x0750: // c7, c5,0 Invalidate icache entirely
case 0x0751: // c7, c5,1 Invalidate icache by address
case 0x0752: // c7, c5,2 Invalidate icache by line (?)
case 0x07d1: // c7,c13,1 Preload icache by address
return;
case 0x0760: // c7, c6,0 Invalidate dcache entirely
case 0x0761: // c7, c6,1 Invalidate dcache by address
case 0x0762: // c7, c6,2 Invalidate dcache by line (? zelda gallery uses this)
//case 0x07a0://c7,c10,0 Clean dcache entirely (?)
case 0x07a1: // c7,c10,1 Clean dcache by address
case 0x07a2: // c7,c10,2 Clean dcache by line
case 0x07a4: // c7,c10,4 Drain write buffer
//case 0x07e0://c7,c14,0 Flush dcache entirely (?)
case 0x07e1: // c7,c14,1 Flush dcache by address
case 0x07e2: // c7,c14,2 Flush dcache by line
return;
case 0x0f00: case 0x1f00: // 0,c15,c0,0 Cache control
case 0x6f00: case 0x7f00: // 3,c15,c0,0 Cache tag/data index
case 0x6f01: case 0x7f01: // 3,c15,c0,1 R/W icache tag
case 0x6f02: case 0x7f02: // 3,c15,c0,2 R/W dcache tag
case 0x6f03: case 0x7f03: // 3,c15,c0,3 R/W icache data
case 0x6f04: case 0x7f04: // 3,c15,c0,4 R/W dcache data
return;
}
}
}
bool iscdp = op2&1, isld = op1&1;
print("arm9: ",hex<8>(r[15])," undefined ", iscdp? "cdp" : isld? "mrc" : "mcr", " p",cpno,", ",
iscdp? (unsigned)op1 : op1>>1, ", ", iscdp? "cr":"r", ird, ", cr", irn, ", cr", irm, ", ", op2>>1, "\n");
return undefined();
}
uint32 ARM946ES::readReg(uint32 addr, uint32 size) {
switch(addr-0x4000000 & ~3) {
// These registers shouldn't be here - but they contain bits from logically
// separate hardware units, so it's not clear where else to put them.
case 0x0000: // Display 0 / PPU0 BG, OBJ control
return (video.frameBuffer<<18 & 0xc0000)
| video.source[0]<<16 | ppu[0].regControl();
case 0x1000: // Display 1 / PPU1 BG, OBJ control
return video.source[1]<<16 | ppu[1].regControl();
case 0x0008: case 0x1008: // BG0,BG1
case 0x000c: case 0x100c: // BG2,BG3
return ppu[addr>>12 & 1].regBg((addr>>1 & 2) + 0)<<0
| ppu[addr>>12 & 1].regBg((addr>>1 & 2) + 1)<<16;
case 0x0010: case 0x1010: // BG0H,V
case 0x0014: case 0x1014: // BG1H,V
case 0x0018: case 0x1018: // BG2H,V
case 0x001c: case 0x101c: // BG3H,V
return ppu[addr>>12 & 1].regBgOffs(addr>>2 & 3);
case 0x0048: case 0x1048: // Window area 0,1,out,obj
return ppu[addr>>12 & 1].regWinArea();
case 0x0050: case 0x1050: // Blend control
return ppu[addr>>12 & 1].regBlend();
// Miscellaneous graphics registers
case 0x0060: return gpu.regRenderOptions();
case 0x0064: return video.regCapture();
case 0x006c: return video.regBrightness(0);
case 0x1060: return 0;
case 0x1064: return 0;
case 0x1068: return 0;
case 0x106c: return video.regBrightness(1);
// Memory control
case 0x0204: return ramPriority<<15 | slot1access<<11
| slot2access<<7 | regSlot2Control();
case 0x0240: return system.regVmap(0);
case 0x0244: return system.regVmap(1);
case 0x0248: return system.regVmap(2);
// Math
case 0x0280: return regDivideControl();
case 0x0290: return regNumerator(0);
case 0x0294: return regNumerator(1);
case 0x0298: return regDenominator(0);
case 0x029c: return regDenominator(1);
case 0x02a0: return regQuotient(0);
case 0x02a4: return regQuotient(1);
case 0x02a8: return regRemainder(0);
case 0x02ac: return regRemainder(1);
case 0x02b0: return regSquareRootControl();
case 0x02b4: return regSquareRoot();
case 0x02b8: return regSquare(0);
case 0x02bc: return regSquare(1);
case 0x0300: return booted<<0 | flag300<<1;
// Power
case 0x0304:
return video.screensPowered<< 0 | ppu[0].powered<<1
| video.ppu0Screen <<15 | ppu[1].powered<<9;
// Render status
case 0x0320: return gpu.regRenderLoad();
// This shouldn't exist, but various games read it nonetheless..
case 0x04a4: return 0;
// Geometry status
case 0x0600: return gpu.regGeomStatus();
case 0x0604: return gpu.regGeomLoad();
case 0x0620: case 0x0624: case 0x0628: case 0x062c:
return gpu.regGeomPosition((addr - 0x4000620)/4);
case 0x0630: case 0x0634:
return gpu.regGeomNormal((addr - 0x4000630)/4);
case 0x0640: case 0x0644: case 0x0648: case 0x064c:
case 0x0650: case 0x0654: case 0x0658: case 0x065c:
case 0x0660: case 0x0664: case 0x0668: case 0x066c:
case 0x0670: case 0x0674: case 0x0678: case 0x067c:
return gpu.regClipMatrix((addr - 0x4000640)/4);
case 0x0680: case 0x0684: case 0x0688:
case 0x068c: case 0x0690: case 0x0694:
case 0x0698: case 0x069c: case 0x06a0:
return gpu.regLightMatrix((addr - 0x4000680)/4);
}
return CPUCore::readReg(addr, size);
}
void ARM946ES::writeReg(uint32 addr, uint32 size, uint32 data) {
uint32 mask = 0xffffffff;
if(size == Half) mask = 0xffff << 8*(addr & 2);
if(size == Byte) mask = 0xff << 8*(addr & 3);
data &= mask;
switch(addr-0x4000000 & ~3) {
case 0x0000: // Display 0 / PPU0 control
if(mask & 0x000f0000) {
// These bits go to the display controller - not PPU0.
video.frameBuffer &= ~0xc;
video.frameBuffer |= 0xc & data>>16;
video.source[0] = 0x3 & data>>16;
}
ppu[0].regControl(data, mask);
return;
case 0x1000: // Display 1 / PPU1 control
if(mask & 0x000f0000) {
// Framebuffer/FIFO sources aren't supported here.
video.source[1] = 0x1 & data>>16;
}
ppu[1].regControl(data, mask);
return;
case 0x0008: case 0x1008: // BGn control
case 0x000c: case 0x100c: //
if(mask & 0x0000ffff) ppu[addr>>12 & 1].regBg((addr>>1 & 2) + 0, data>>0, mask>>0);
if(mask & 0xffff0000) ppu[addr>>12 & 1].regBg((addr>>1 & 2) + 1, data>>16, mask>>16);
return;
case 0x0010: case 0x1010: // BGn H,V scroll
case 0x0014: case 0x1014: //
case 0x0018: case 0x1018: //
case 0x001c: case 0x101c: return ppu[addr>>12 & 1].regBgOffs(addr>>2 & 3, data, mask);
case 0x0020: case 0x1020: // BG2 affine A, B, C, D
case 0x0024: case 0x1024: //
case 0x0028: case 0x1028: // BG2 origin X, Y
case 0x002c: case 0x102c: return ppu[addr>>12 & 1].regBgAffine(2, addr>>2 & 3, data, mask);
case 0x0030: case 0x1030: // BG3 affine A, B, C, D
case 0x0034: case 0x1034: //
case 0x0038: case 0x1038: // BG3 origin X, Y
case 0x003c: case 0x103c: return ppu[addr>>12 & 1].regBgAffine(3, addr>>2 & 3, data, mask);
case 0x0040: case 0x1040: return ppu[addr>>12 & 1].regWinDims(0, data, mask); // Window 0,1 X range
case 0x0044: case 0x1044: return ppu[addr>>12 & 1].regWinDims(1, data, mask); // 0,1 Y range
case 0x0048: case 0x1048: return ppu[addr>>12 & 1].regWinArea(data, mask); // area 0,1,out,obj
case 0x004c: case 0x104c: return; // Mosaic BG,OBJ X,Y
case 0x0050: case 0x1050: // Blend control
case 0x0054: case 0x1054: return ppu[addr>>12 & 1].regBlend(addr>>2 & 1, data, mask);
case 0x0058: case 0x1058: return; // not present, but frequently zeroed anyway
case 0x005c: case 0x105c: return; //
// Some miscellaneous registers, none of which belong here... hunh.
case 0x0060: return gpu.regRenderOptions(data, mask);
case 0x0064: return video.regCapture(data, mask);
case 0x0068: return video.regFifo(data);
case 0x006c: return video.regBrightness(0, data, mask);
case 0x1060: return;
case 0x1064: return;
case 0x1068: return;
case 0x106c: return video.regBrightness(1, data, mask);
// Memory control
case 0x0204:
if(mask & 0x00ff) {
slot2access = !(data & 1<<7);
}
if(mask & 0xff00) {
slot1access = !(data & 1<<11);
ramPriority = !(data & 1<<15);
}
return regSlot2Control(data, mask);
case 0x0240: return system.regVmap(0, data, mask);
case 0x0244: return system.regVmap(1, data, mask);
case 0x0248: return system.regVmap(2, data, mask);
// Math
case 0x0280: return regDivideControl(data, mask);
case 0x0290: return regNumerator(0, data, mask);
case 0x0294: return regNumerator(1, data, mask);
case 0x0298: return regDenominator(0, data, mask);
case 0x029c: return regDenominator(1, data, mask);
case 0x02b0: return regSquareRootControl(data, mask);
case 0x02b8: return regSquare(0, data, mask);
case 0x02bc: return regSquare(1, data, mask);
case 0x0300:
if(mask & 0xff) {
booted |= data>>0;
flag300 = data>>1;
}
return;
case 0x0304: // Power control
if(mask & 0x00ff) {
video.screensPowered = data>>0;
ppu[0].powered = data>>1;
//gpu.renderPowered = data>>2;
//gpu.geomPowered = data>>3;
}
if(mask & 0xff00) {
ppu[1].powered = data>>9;
video.ppu0Screen = data>>15;
}
return;
// Toon edge table
case 0x0330: case 0x0334: case 0x0338: case 0x033c:
return gpu.regRenderEdgeTable((addr - 0x4000330)/4, data, mask);
// Misc
case 0x0340: return gpu.regRenderMinAlpha(data, mask);
case 0x0350: return gpu.regRenderClearColor(data, mask);
case 0x0354: return gpu.regRenderClearCoord(data, mask);
case 0x0358: return gpu.regRenderFogColor(data, mask);
case 0x035c: return gpu.regRenderFogCoord(data, mask);
// Fog table
case 0x0360: case 0x0364: case 0x0368: case 0x036c:
case 0x0370: case 0x0374: case 0x0378: case 0x037c:
return gpu.regRenderFogTable((addr - 0x4000360)/4, data, mask);
// Toon shade table
case 0x0380: case 0x0384: case 0x0388: case 0x038c:
case 0x0390: case 0x0394: case 0x0398: case 0x039c:
case 0x03a0: case 0x03a4: case 0x03a8: case 0x03ac:
case 0x03b0: case 0x03b4: case 0x03b8: case 0x03bc:
return gpu.regRenderToonTable((addr - 0x4000380)/4, data, mask);
// GPU command pipe - buffered - mirrored to support STM
case 0x0400: case 0x0404: case 0x0408: case 0x040c: case 0x0410: case 0x0414: case 0x0418: case 0x041c:
case 0x0420: case 0x0424: case 0x0428: case 0x042c: case 0x0430: case 0x0434: case 0x0438: case 0x043c:
return gpu.sendGeomBuffered(data);
// GPU command pipe - immediate - the address itself is used as command number.
case 0x0440: case 0x0444: case 0x0448: case 0x044c: case 0x0450: case 0x0454: case 0x0458: case 0x045c:
case 0x0460: case 0x0464: case 0x0468: case 0x046c: case 0x0470: case 0x0474: case 0x0478: case 0x047c:
case 0x0480: case 0x0484: case 0x0488: case 0x048c: case 0x0490: case 0x0494: case 0x0498: case 0x049c:
case 0x04a0: case 0x04a4: case 0x04a8: case 0x04ac: case 0x04b0: case 0x04b4: case 0x04b8: case 0x04bc:
case 0x04c0: case 0x04c4: case 0x04c8: case 0x04cc: case 0x04d0: case 0x04d4: case 0x04d8: case 0x04dc:
case 0x04e0: case 0x04e4: case 0x04e8: case 0x04ec: case 0x04f0: case 0x04f4: case 0x04f8: case 0x04fc:
case 0x0500: case 0x0504: case 0x0508: case 0x050c: case 0x0510: case 0x0514: case 0x0518: case 0x051c:
case 0x0520: case 0x0524: case 0x0528: case 0x052c: case 0x0530: case 0x0534: case 0x0538: case 0x053c:
case 0x0540: case 0x0544: case 0x0548: case 0x054c: case 0x0550: case 0x0554: case 0x0558: case 0x055c:
case 0x0560: case 0x0564: case 0x0568: case 0x056c: case 0x0570: case 0x0574: case 0x0578: case 0x057c:
case 0x0580: case 0x0584: case 0x0588: case 0x058c: case 0x0590: case 0x0594: case 0x0598: case 0x059c:
case 0x05a0: case 0x05a4: case 0x05a8: case 0x05ac: case 0x05b0: case 0x05b4: case 0x05b8: case 0x05bc:
case 0x05c0: case 0x05c4: case 0x05c8: case 0x05cc: case 0x05d0: case 0x05d4: case 0x05d8: case 0x05dc:
case 0x05e0: case 0x05e4: case 0x05e8: case 0x05ec: case 0x05f0: case 0x05f4: case 0x05f8: case 0x05fc:
return gpu.sendGeomImmediate(addr>>2 & 0x7f, data);
// Geometry engine
case 0x0600: return gpu.regGeomStatus(data, mask);
case 0x0610: return gpu.regGeomMaxPointDepth(data, mask);
// zelda gallery writes 0x2468ace0 here - ???
case 0x0640: break;
}
return CPUCore::writeReg(addr, size, data);
}
void ARM946ES::updateTcm() {
uint32 itcmAddr = 0, itcmSize = itcmRegion>>1 & 0x1f;
uint32 dtcmAddr = dtcmRegion & ~0xfff, dtcmSize = dtcmRegion>>1 & 0x1f;
// Disable by default
itcmRCompare = itcmWCompare = ~0;
dtcmRCompare = dtcmWCompare = ~0;
itcmRMask = itcmWMask = 0;
dtcmRMask = dtcmWMask = 0;
// Enable bit enables access; load bit disables reading
if(control.itcm) itcmWCompare = itcmAddr, itcmWMask = -1 << 9+itcmSize;
if(control.dtcm) dtcmWCompare = dtcmAddr, dtcmWMask = -1 << 9+dtcmSize;
if(control.itcm && !control.itcmLoad) itcmRCompare = itcmAddr, itcmRMask = -1 << 9+itcmSize;
if(control.dtcm && !control.dtcmLoad) dtcmRCompare = dtcmAddr, dtcmRMask = -1 << 9+dtcmSize;
}
#include "math.cpp"

View File

@ -1,66 +0,0 @@
namespace Bit {
template<typename T, int w = sizeof(T)*8/2>
constexpr int count(T n) {
return n == 0 || w == 0? n
: count<w/2>(n>>w & (1<<w)-1)
+ count<w/2>(n & (1<<w)-1);
}
// These functions deal with binary string literals like:
// "1000110.... // xxxx...0000"
//
// Firstly, spaces are ignored. Dots '.' are don't-care bits.
// An / is shorthand for a blank nibble (4 bits or "....").
//
// binary() returns a mask of the 1s.
// mask() returns a mask of the 1s and 0s.
// field() returns a mask of letters for use with collect().
// match() uses the 1s and 0s; anything else is wildcard.
//
constexpr bool isfield(char c) { return c!='.'&& c!='/'&& c!='0'&& c!='1'; }
constexpr long bit(int n) { return n < 32? 1<<n : 0; }
constexpr int bitpos(const char *s) { return !*s? -1 : *s=='/'? 4 + bitpos(s+1) : (*s!=' ') + bitpos(s+1); }
constexpr int fieldpos(const char *s){ return !*s? -1 : (*s!=' ' && isfield(*s)) + fieldpos(s+1); }
// Finds index of the lowest bit set in a compile-time constant.
template<int s = 16> constexpr long lowest_bit_no(long x) {
return !x? -1 : !s? 0 :
x & (1<<s)-1? lowest_bit_no<s/2>(x)
: s + lowest_bit_no<s/2>(x >> s);
}
constexpr long binary(const char *s) {
return !*s? 0 : (*s=='1') << bitpos(s) | binary(s+1);
}
constexpr long mask(const char *s) {
return !*s? 0 : (*s=='0'||*s=='1') << bitpos(s) | mask(s+1);
}
constexpr long field(const char *s) {
return !*s? 0 : (*s!=' '&& isfield(*s)) << bitpos(s) | field(s+1);
}
constexpr bool match(long data, const char *s) {
return (data & mask(s)) == binary(s);
}
// Masks all 'bits' from the input and shifts them down into one contiguous
// value. This inlines to a sequence of &, >> and + instructions.
// Example:
// collect<mask("xxxx....yyyy....zzzz")>(i)
// joins i's 1st, 3rd, and 5th nibbles (0x12345 => 0x135).
template<long bits> constexpr long collect(long data);
template<long bits, int nmask> constexpr long collect_field(long data) {
return (data & (1<<nmask)-1) * (bits&1)
+ (collect<((unsigned long)bits >> nmask)>(data >> nmask) << nmask*(bits&1));
}
template<long bits> constexpr long collect(long data) {
return collect_field<bits, lowest_bit_no(bits & 1? ~bits : bits)>(data);
}
template<> constexpr long collect<0>(long data) {
return 0;
}
};

View File

@ -1,214 +0,0 @@
void ARMCore::power() {
for(int n = 0; n < 16; n++) r[n] = 0;
for(int n = 0; n < 7; n++) r_fiq[n] = 0;
for(int n = 0; n < 2; n++) r_irq[n] = r_svc[n] = r_abt[n] = r_und[n] = 0;
spsr_none = spsr_fiq = spsr_irq = spsr_svc = spsr_abt = spsr_und = 0;
carryout = 0;
Nf = Cf = Vf = Qf = 0;
Zf = -1;
reset();
}
uint32& ARMCore::spsr() {
return mode == FIQ? spsr_fiq : mode == IRQ? spsr_irq
: mode == SVC? spsr_svc : mode == UND? spsr_und
: mode == ABT? spsr_abt : spsr_none;
}
uint32 ARMCore::readCpsr() {
uint32 n = Nf & 1<<31, c = Cf & 1<<31;
uint32 v = Vf & 1<<31, q = Qf & 1<<31;
uint32 z = !Zf <<31;
return n>>0 | z>>1 | c>>2 | v>>3 | q>>4 | If<<7 | Ff<<6 | Tf<<5 | mode<<0;
}
void ARMCore::writeCpsr(uint32 value, unsigned mask) {
if(mask & 8) {
Nf = value<<0 & 1<<31; Zf = !(value<<1 & 1<<31);
Cf = value<<2 & 1<<31; Vf = value<<3 & 1<<31;
Qf = value<<4 & 1<<31;
}
if(mode == USR)
return; // cpsr_c is privileged
if(mask & 1) {
swapBank(mode);
If = value>>7 & 1; Ff = value>>6 & 1;
Tf = value>>5 & 1; mode = value & 0x1f;
mode |= 0x10; // enforce 32-bit addressing
if(mode & 0x0c)
mode |= 0x03; // enforce valid mode bits
swapBank(mode);
}
}
void ARMCore::vector(uint32 offset, unsigned tomode) {
uint32 psr = readCpsr();
swapBank(mode);
swapBank(mode = tomode);
// vector() is called between the instruction fetch and PC increment..
// therefore in ARM mode, r15 == PC+8. THUMB requires an adjustment
// (done in the caller).
r[14] = r[15] - 4;
r[15] = vectorBase + offset;
If = 1;
Tf = 0;
spsr() = psr;
branched = true;
}
void ARMCore::swapBank(unsigned mode) {
uint32 *bank = nullptr, *user = &r[15];
unsigned count = 0;
if(mode == FIQ) bank = &r_fiq[count = 7];
if(mode == IRQ) bank = &r_irq[count = 2];
if(mode == SVC) bank = &r_svc[count = 2];
if(mode == ABT) bank = &r_abt[count = 2];
if(mode == UND) bank = &r_und[count = 2];
while(count--) std::swap(*--bank, *--user);
}
bool ARMCore::evalCond(unsigned cond) {
if(cond == 14) return true;
if(cond == 0) return (Zf == 0); // eq
if(cond == 1) return !(Zf == 0); // ne
if(cond == 2) return (Cf < 0); // hs / cs
if(cond == 3) return !(Cf < 0); // lo / cc
if(cond == 8) return (Cf < 0 && Zf); // hi
if(cond == 9) return !(Cf < 0 && Zf); // ls
if(cond == 10) return ((Nf^Vf) >= 0); // ge
if(cond == 11) return !((Nf^Vf) >= 0); // lt
if(cond == 12) return ((Nf^Vf) >= 0 && Zf); // gt
if(cond == 13) return !((Nf^Vf) >= 0 && Zf); // le
if(cond == 4) return (Nf < 0); // mi
if(cond == 5) return !(Nf < 0); // pl
if(cond == 6) return (Vf < 0); // vs
if(cond == 7) return !(Vf < 0); // vc
return false;
}
void ARMCore::branch(bool tf, uint32 target) {
r[15] = target;
Tf = tf;
branched = true;
}
uint32 ARMCore::load(uint32 addr, uint32 size) {
uint32 data = read(addr, size, false);
data = ror(data, 8*(addr & 3));
step(1);
if(size == Half) data &= 0xffff;
if(size == Byte) data &= 0xff;
return data;
}
void ARMCore::store(uint32 addr, uint32 size, uint32 data) {
if(size == Half) data &= 0xffff, data *= 0x00010001;
if(size == Byte) data &= 0xff, data *= 0x01010101;
write(addr, size, false, data);
}
ARMCore::SOut ARMCore::lsl(uint32 rm, uint8 rs) {
if(rs == 0) return {rm, Cf};
else return {rs>31? 0 : rm << rs,
rs>32? 0 : rm << rs-1};
}
ARMCore::SOut ARMCore::lsr(uint32 rm, uint8 rs) {
if(rs == 0) return {rm, Cf};
else return {rs>31? 0 : rm >> rs,
rs>32? 0 : rm << 32-rs};
}
ARMCore::SOut ARMCore::asr(uint32 rm, uint8 rs) {
if(rs == 0) return {rm, Cf};
else return {rs>31? (int32)rm>>31 : (int32)rm >> rs,
rs>32? rm : rm << 32-rs};
}
ARMCore::SOut ARMCore::ror(uint32 rm, uint8 rs) {
if(rs == 0) return {rm, Cf};
if(!(rs &= 31)) return {rm, rm}; // rs == multiple of 32
else return {rm << 32-rs | rm >> rs, rm << 32-rs};
}
ARMCore::SOut ARMCore::rrx(uint32 rm) {
return {(Cf & 1<<31) | rm >> 1, rm << 31};
}
ARMCore::SOut ARMCore::shiftImm(uint4 irm, uint2 opcode, uint5 rs) {
if(opcode == 0) return lsl(r[irm], rs);
if(opcode == 1) return lsr(r[irm], rs? (uint8)rs : 32);
if(opcode == 2) return asr(r[irm], rs? (uint8)rs : 32);
if(rs != 0) return ror(r[irm], rs);
if(rs == 0) return rrx(r[irm]);
}
void ARMCore::alu(unsigned opcode, uint32& rd, uint32 rn, SOut rm) {
if(opcode == 13*2+0) return bitf(0, rd = rm, rm); // mov
if(opcode == 2*2+0) return sumf(0, rd = rn - rm, rn,~rm); // sub
if(opcode == 4*2+0) return sumf(0, rd = rn + rm, rn, rm); // add
if(opcode == 0*2+0) return bitf(0, rd = rn & rm, rm); // and
if(opcode == 12*2+0) return bitf(0, rd = rn | rm, rm); // orr
if(opcode == 14*2+0) return bitf(0, rd = rn &~rm, rm); // bic
if(opcode == 1*2+0) return bitf(0, rd = rn ^ rm, rm); // eor
if(opcode == 13*2+1) return bitf(1, rd = rm, rm); // movs
if(opcode == 10*2+1) return sumf(1, rn - rm, rn,~rm); // cmps
if(opcode == 11*2+1) return sumf(1, rn + rm, rn, rm); // adds
if(opcode == 2*2+1) return sumf(1, rd = rn - rm, rn,~rm); // subs
if(opcode == 4*2+1) return sumf(1, rd = rn + rm, rn, rm); // adds
if(opcode == 0*2+1) return bitf(1, rd = rn & rm, rm); // ands
if(opcode == 12*2+1) return bitf(1, rd = rn | rm, rm); // orrs
if(opcode == 14*2+1) return bitf(1, rd = rn &~rm, rm); // bics
if(opcode == 1*2+1) return bitf(1, rd = rn ^ rm, rm); // eors
if(opcode == 8*2+1) return bitf(1, rn & rm, rm); // tsts
if(opcode == 9*2+1) return bitf(1, rn ^ rm, rm); // teqs
if(opcode == 3*2+0) return sumf(0, rd = rm - rn, ~rn, rm); // rsb
if(opcode == 5*2+0) return sumf(0, rd = rn + rm +!!(Cf>>31), rn, rm); // adc
if(opcode == 6*2+0) return sumf(0, rd = rn - rm - !(Cf>>31), rn,~rm); // sbc
if(opcode == 7*2+0) return sumf(0, rd = rm - rn - !(Cf>>31),~rn, rm); // rsc
if(opcode == 15*2+0) return bitf(0, rd = ~rm, rm); // mvn
if(opcode == 3*2+1) return sumf(1, rd = rm - rn, ~rn, rm); // rsbs
if(opcode == 5*2+1) return sumf(1, rd = rn + rm +!!(Cf>>31), rn, rm); // adcs
if(opcode == 6*2+1) return sumf(1, rd = rn - rm - !(Cf>>31), rn,~rm); // sbcs
if(opcode == 7*2+1) return sumf(1, rd = rm - rn - !(Cf>>31),~rn, rm); // rscs
if(opcode == 15*2+1) return bitf(1, rd = ~rm, rm); // mvns
}
uint32 ARMCore::oflow(uint32 rd, uint32 rn, uint32 rm) {
return ~(rn^rm) & (rn^rd);
}
void ARMCore::bitf(bool s, uint32 rd, SOut rm) {
if(s) Cf = rm.carry, Nf = Zf = rd;
}
void ARMCore::sumf(bool s, uint32 rd, uint32 rn, uint32 rm) {
if(s) Vf = oflow(rd,rn,rm), Cf = Vf ^ rd^rn^rm, Nf = Zf = rd;
}
#include "arm.cpp"
#include "thumb.cpp"

View File

@ -1,135 +0,0 @@
struct ARMCore {
// Model specific implementation
virtual void power();
virtual void step(unsigned n) = 0;
// Instruction and data access
virtual uint32 fetch(uint32 addr, uint32 size, bool s) = 0;
virtual uint32 read(uint32 addr, uint32 size, bool s) = 0;
virtual void write(uint32 addr, uint32 size, bool s, uint32 data) = 0;
// CDP, MCR, MRC
virtual void dataCop(uint4 cpno, uint4 op1, uint4 ird, uint4 irn, uint4 irm, uint4 op2) = 0;
// Vectors
void reset() { vector(0x000, SVC); Ff = 1; } // THUMB:
void fiq() { vector(0x01c, FIQ); Ff = 1; if(spsr() & 1<<5) r[14] += 4; } // LR = insn+4
void irq() { vector(0x018, IRQ); if(spsr() & 1<<5) r[14] += 4; }
void swi() { vector(0x008, SVC); if(spsr() & 1<<5) r[14] += 2; } // LR = insn+2
void undefined() { vector(0x004, UND); if(spsr() & 1<<5) r[14] += 2; }
void pfabort() { vector(0x00c, ABT); if(spsr() & 1<<5) r[14] += 2; }
void abort() { vector(0x010, ABT); if(spsr() & 1<<5) r[14] += 8; } // LR = insn+8
// PSRs and mode switching
uint32& spsr();
uint32 readCpsr();
void writeCpsr(uint32 value, uint32 mask);
void vector(uint32 offset, unsigned tomode);
void swapBank(unsigned mode);
alwaysinline bool evalCond(unsigned cond);
void branch(bool tf, uint32 target);
// LDR rotation, STR mirroring
uint32 load(uint32 addr, uint32 size);
void store(uint32 addr, uint32 size, uint32 data);
// Shifts, arithmetic
struct SOut {
// Shifter output
uint32 rm;
int32 carry;
SOut(uint32 rm) : rm(rm) {}
SOut(uint32 rm, int32 carry) : rm(rm), carry(carry) {}
operator uint32() { return rm; }
};
alwaysinline SOut lsl(uint32 rm, uint8 rs);
alwaysinline SOut lsr(uint32 rm, uint8 rs);
alwaysinline SOut asr(uint32 rm, uint8 rs);
alwaysinline SOut ror(uint32 rm, uint8 rs);
alwaysinline SOut rrx(uint32 rm);
alwaysinline SOut shiftImm(uint4 irm, uint2 opcode, uint5 rs);
alwaysinline void alu(unsigned opcode, uint32& rd, uint32 rn, SOut rm);
// Flags
alwaysinline uint32 oflow(uint32 rd, uint32 rn, uint32 rm);
alwaysinline void bitf(bool s, uint32 rd, SOut rm);
alwaysinline void sumf(bool s, uint32 rd, uint32 rn, uint32 rm);
// ARM argument Rm
alwaysinline uint32 armRm(uint4 irm) { return r[irm]; }
alwaysinline SOut armImmed(uint8 value, uint4 rs) { return ror(value, 2*rs); }
alwaysinline uint32 armOffset8(uint4 lo, uint4 hi) { return lo | hi<<4; }
alwaysinline uint32 armOffset12(uint12 value) { return value; }
// ARM handlers
void armWritePsr(uint1 opcode, uint4 mask, uint32 rm);
void armReadPsr(uint1 opcode, uint4 ird);
void armBranch(uint1 link, uint1 exch, int26 offset);
void armBranchEx(uint1 link, uint4 irm);
void armClz(uint4 ird, uint4 irm);
void armDspAdd(uint2 opcode, uint4 ird, uint4 irn, uint4 irm);
void armDspMul(uint2 opcode, uint2 xy, uint4 ird, uint4 irn, uint4 irm, uint4 irs);
void armMultiply(uint4 opcode, uint4 ird, uint4 irn, uint4 irm, uint4 irs);
void armDataRs(uint5 opcode, uint4 ird, uint4 irn, uint4 irm, uint2 sh, uint4 irs);
void armData(uint5 opcode, uint4 ird, uint4 irn, SOut rm);
void armMemSwap(uint1 opcode, uint4 ird, uint4 irn, uint4 irm);
void armMem(uint5 opcode, uint4 ird, uint4 irn, uint32 rm);
void armMem_v4(uint5 opcode, uint2 sh, uint4 ird, uint4 irn, uint32 rm);
void armMem_v5(uint5 opcode, uint2 sh, uint4 ird, uint4 irn, uint32 rm);
void armBlock(uint5 opcode, uint4 irn, uint16 rlist);
// THUMB handlers
void thumbDataLo(uint4 opcode, uint3 ird, uint3 irm);
void thumbDataHi(uint2 opcode, uint4 ird, uint4 irm);
void thumbDataImm(uint2 opcode, uint3 ird, uint8 rm);
void thumbShiftImm(uint2 opcode, uint3 ird, uint3 irm, uint5 rs);
void thumbAddSub(uint2 opcode, uint3 ird, uint3 irn, uint3 irm);
void thumbMemImm(uint5 opcode, uint3 ird, uint3 irn, uint5 rm);
void thumbMemReg(uint3 opcode, uint3 ird, uint3 irn, uint3 irm);
void thumbRelative(uint5 opcode, uint3 ird, uint8 rm);
void thumbAddSP(uint1 opcode, uint7 rm);
void thumbBlock(uint4 opcode, uint3 irn, uint8 rlist);
void thumbCond(uint4 opcode, int8 offset);
void thumbBranch(int11 offset);
void thumbBh(int11 offset);
void thumbBlx(uint1 link, uint11 offset);
// CPSR
// NZCVQ are stored in a lazy format:
// - Z is only true when 0.
// - NCVQ are true when < 0 (bit 31 set).
int32 Nf, Zf, Cf, Vf, Qf;
uint8 If, Ff, Tf, mode;
// Output from shifter
int32 carryout; // same format as Cf
// Pipeline data
bool branched;
uint32 ifetch, idecode, iexecute;
// Configuration
uint32 vectorBase; // Address of vector table
bool bxWithLoadedPC; // Does LDR/LDM PC behave as MOV or BX Rm?
// Register banks
// - r_fiq[n] etc. hold registers in other banks.
// - r[n] holds registers for the active mode
// - This means that in FIQ mode, r_fiq[n] holds shadowed _user_ registers.
// - SPSR is not swapped, use spsr() to access it.
uint32 r[16], spsr_none, r_abt[2], spsr_abt;
uint32 r_fiq[7], spsr_fiq, r_irq[2], spsr_irq;
uint32 r_svc[2], spsr_svc, r_und[2], spsr_und;
// Exception modes
enum {
USR = 0x10,
FIQ = 0x11, IRQ = 0x12, SVC = 0x13,
UND = 0x1b, ABT = 0x17, SYS = 0x1f,
};
};

View File

@ -1,337 +0,0 @@
#include <nds/nds.hpp>
#include "bit.hpp"
#define imatch(bits) ((i & force< Bit::mask(bits) >()) \
== force< Bit::binary(bits) >())
template<uint32 arg> static constexpr uint32 force() { return arg; }
namespace NintendoDS {
#include "core.cpp"
#include "disasm.cpp"
ARM7TDMI arm7;
ARM946ES arm9;
CPUCore::CPUCore() {
thread = nullptr;
}
CPUCore::~CPUCore() {
if(thread) co_delete(thread);
}
void CPUCore::power() {
ARMCore::power();
clock = 0;
trace = false;
powerState = running;
config.xorSeeds[0] = 0;
config.xorSeeds[1] = 0;
config.slot2ramTiming = 0;
config.slot2romTiming0 = 0;
config.slot2romTiming1 = 0;
config.slot2phi = 0;
interrupt.gate = false;
interrupt.enable = false;
interrupt.flags = 0;
status.inVBlank = false;
status.inHBlank = false;
status.inVMatch = false;
status.irqOnVBlank = false;
status.irqOnHBlank = false;
status.irqOnVMatch = false;
status.vcompare = 0;
msg.enable = false;
msg.irqOnRecv = false;
msg.irqOnSent = false;
msg.error = 0;
msg.empty = 1;
msg.full = 0;
msg.writeOffset = 0;
msg.readOffset = 0;
for(auto &e : msg.buffer)
e = 0;
sync.enable = false;
sync.output = 0;
event.irq = false;
event.anydma = false;
event.queue.reset();
for(unsigned n = 0; n < 4; n++) {
dma[n].enable = false;
dma[n].irq = false;
dma[n].size = 0;
dma[n].repeat = false;
dma[n].trigger = 0;
dma[n].srcStep = 0;
dma[n].destStep = 0;
dma[n].source = 0;
dma[n].dest = 0;
dma[n].count = 0;
dma[n].fill = 0;
dma[n].init.source = 0;
dma[n].init.dest = 0;
dma[n].init.count = 0;
timer[n].enable = false;
timer[n].irq = false;
timer[n].cascade = false;
timer[n].divider = 0;
timer[n].reload = 0;
timer[n].count = 0;
timer[n].lastUpdate = 0;
timer[n].event.action = [&, n]() { updateTimer(n); };
}
}
void CPUCore::hdraw() {
status.inHBlank = false;
status.inVMatch = false;
if(192 == video.line) {
status.inVBlank = true;
if(status.irqOnVBlank)
interrupt.flags |= irqVBlank;
// Trigger vblank DMAs
dmaTrigger(0xf, 1);
}
if(262 == video.line) {
status.inVBlank = false;
}
// Trigger peripheral-to-framebuffer DMA
if(config.arm9 && 2 <= video.line && video.line < 192+2)
dmaTrigger(0xf, 3);
if(status.vcompare == video.line) {
status.inVMatch = true;
if(status.irqOnVMatch) {
interrupt.flags |= irqVMatch;
}
}
}
void CPUCore::hblank() {
status.inHBlank = true;
if(status.irqOnHBlank)
interrupt.flags |= irqHBlank;
// Trigger HDMAs
if(config.arm9 && video.line < 192)
dmaTrigger(0xf, 2);
}
string CPUCore::tracePsr(uint32 value) {
return { value & 1<<31? "N":" ",
value & 1<<30? "Z":" ",
value & 1<<29? "C":" ",
value & 1<<28? "V":" ",
value & 1<<27? "Q":" ",
value & 1<< 7? "I":" ",
value & 1<< 6? "F":" ",
value & 1<< 5? "T":" ",
"/", hex<2>(value & 0x1f) };
}
void CPUCore::traceInsn() {
for(unsigned n = 0; n < 8; n++) print(hex<1>(n),":",hex<8>(r[n])," ");
print(tracePsr(readCpsr()), "\n");
for(unsigned n = 8; n < 16; n++) print(hex<1>(n),":",hex<8>(r[n])," ");
if(mode != SYS && mode != USR) print(tracePsr(spsr()), "\n");
else print("--------:--", "\n");
if(Tf) print(hex<8>(r[15] - 4), ": ", hex<4>(iexecute), " ", disasm::thumb(this, iexecute), "\n");
else print(hex<8>(r[15] - 8), ": ", hex<8>(iexecute), " ", disasm::arm(this, iexecute), "\n");
fflush(stdout);
}
uint32 CPUCore::readReg(uint32 addr, uint32 size) {
addr &= ~3;
//static bool vbl = 0;
switch(addr-0x04000000) {
case 0x0004: // Display status
return video.line << 16 | status.irqOnVMatch<< 5 | status.inVMatch<< 2
| (status.vcompare & 0x0ff)<< 8 | status.irqOnHBlank<< 4 | status.inHBlank<< 1
| (status.vcompare & 0x100)>> 1 | status.irqOnVBlank<< 3 | status.inVBlank<< 0;
case 0x1004:
return 0;
case 0x00b0: // DMA0 source
case 0x00bc: // DMA1 source
case 0x00c8: // DMA2 source
case 0x00d4: // DMA3 source
return regDmaSource((addr-0x40000b0)/12);
case 0x00b4: // DMA0 dest
case 0x00c0: // DMA1 dest
case 0x00cc: // DMA2 dest
case 0x00d8: // DMA3 dest
return regDmaDest((addr-0x40000b0)/12);
case 0x00b8: // DMA0 control
case 0x00c4: // DMA1 control
case 0x00d0: // DMA2 control
case 0x00dc: // DMA3 control
return regDmaControl((addr-0x40000b0)/12);
case 0x00e0: // DMA0 fill
case 0x00e4: // DMA1 fill
case 0x00e8: // DMA2 fill
case 0x00ec: // DMA3 fill
return regDmaFill(addr>>2 & 3);
case 0x0100: // Timer 0
case 0x0104: // Timer 1
case 0x0108: // Timer 2
case 0x010c: // Timer 3
return regTimer(addr>>2 & 3);
case 0x130: { // Keypad input
unsigned keys = 0;
for(unsigned n = 0; n < 10; n++) {
if(interface->inputPoll(ID::Port::Buttons, 0, n))
keys += 1<<n;
}
return keys ^ 0x3ff;
}
case 0x132: // Keypad interrupt
break;
// SYNC, message control, receive port
case 0x000180: return regSync();
case 0x000184: return regMsgControl();
case 0x100000: return regMsgRecv();
case 0x0001a0: return regSlot1Control();
case 0x0001a4: return regSlot1RomControl();
case 0x0001a8: return regSlot1RomCommand(0);
case 0x0001ac: return regSlot1RomCommand(1);
case 0x100010: return regSlot1RomRecv();
// IME, IE, IF
case 0x0208: return interrupt.gate;
case 0x0210: return interrupt.enable;
case 0x0214: return interrupt.flags;
case 0x0218: return 0; // IE: DSi bits
case 0x021c: return 0; // IF: DSi bits
// DSi-enhanced software checks these.
case 0x4000: return 0;
case 0x4008: return 0<<15;
}
//print("r ",hex<8>(addr),":",size,": unimplemented\n");
return 0;
}
void CPUCore::writeReg(uint32 addr, uint32 size, uint32 data) {
uint32 mask = 0xffffffff;
if(size == Half) mask = 0xffff << 8*(addr & 2);
if(size == Byte) mask = 0xff << 8*(addr & 3);
addr &= ~3, data &= mask;
switch(addr-0x04000000) {
case 0x0004: // Display status
// It might seem odd to have this here, but each CPU has one.
if(mask & 0xff00) status.vcompare = (status.vcompare & 0x100) | (data>>8 & 0x0ff);
if(mask & 0x0080) status.vcompare = (status.vcompare & 0x0ff) | (data<<1 & 0x100);
if(mask & 0x003f) {
status.irqOnVMatch = data >> 5;
status.irqOnHBlank = data >> 4;
status.irqOnVBlank = data >> 3;
}
return;
case 0x1004:
return;
case 0x00b0: // DMA0 source
case 0x00bc: // DMA1 source
case 0x00c8: // DMA2 source
case 0x00d4: // DMA3 source
return regDmaSource((addr-0x40000b0)/12, data, mask);
case 0x00b4: // DMA0 dest
case 0x00c0: // DMA1 dest
case 0x00cc: // DMA2 dest
case 0x00d8: // DMA3 dest
return regDmaDest((addr-0x40000b0)/12, data, mask);
case 0x00b8: // DMA0 control
case 0x00c4: // DMA1 control
case 0x00d0: // DMA2 control
case 0x00dc: // DMA3 control
return regDmaControl((addr-0x40000b0)/12, data, mask);
case 0x00e0: // DMA0 fill
case 0x00e4: // DMA1 fill
case 0x00e8: // DMA2 fill
case 0x00ec: // DMA3 fill
return regDmaFill(addr>>2 & 3, data, mask);
case 0x0100: // Timer 0
case 0x0104: // Timer 1
case 0x0108: // Timer 2
case 0x010c: // Timer 3
return regTimer(addr>>2 & 3, data, mask);
case 0x132: // Keypad interrupt
break;
// SYNC, message control, send port
case 0x0180: return regSync(data, mask);
case 0x0184: return regMsgControl(data, mask);
case 0x0188: return regMsgSend(data);
// Slot 1
case 0x01a0: return regSlot1Control(data, mask);
case 0x01a4: return regSlot1RomControl(data, mask);
case 0x01a8: return regSlot1RomCommand(0, data, mask);
case 0x01ac: return regSlot1RomCommand(1, data, mask);
case 0x01b0: return regSlot1RomSeed(0, data, mask);
case 0x01b4: return regSlot1RomSeed(1, data, mask);
case 0x01b8: return regSlot1RomSeed(2, data, mask);
// IME, IE, IF
case 0x0208: if(mask & 1) interrupt.gate = data & 1; return;
case 0x0210: interrupt.enable = interrupt.enable & ~mask | data; return;
case 0x0214: interrupt.flags &= ~data; return;
case 0x0218: return; // IE: DSi bits
case 0x021c: return; // IF: DSi bits
}
//print("w ",hex<8>(addr),":",size," = ",hex<8>(data),": unimplemented\n");
}
#include "slot.cpp"
#include "message.cpp"
#include "dma.cpp"
#include "timer.cpp"
#include "arm7tdmi.cpp"
#include "arm946es.cpp"
#undef imatch
}

View File

@ -1,288 +0,0 @@
#include "core.hpp"
struct CPUCore : ARMCore {
CPUCore();
~CPUCore();
void power();
void hdraw();
void hblank();
string tracePsr(uint32 value);
void traceInsn();
uint32 readReg(uint32 addr, uint32 size);
void writeReg(uint32 addr, uint32 size, uint32 data);
void popMsg();
void pushMsg(uint32 data);
void clearMsg();
uint32 regMsgControl();
uint32 regMsgRecv();
uint32 regSync();
void regMsgControl(uint32 data, uint32 mask);
void regMsgSend(uint32 data);
void regSync(uint32 data, uint32 mask);
void dmaTransfer(unsigned no);
bool dmaTrigger(unsigned channels, unsigned value);
uint32 regDmaControl(unsigned no);
uint32 regDmaSource(unsigned no);
uint32 regDmaDest(unsigned no);
uint32 regDmaFill(unsigned no);
void regDmaControl(unsigned no, uint32 data, uint32 mask);
void regDmaSource(unsigned no, uint32 data, uint32 mask);
void regDmaDest(unsigned no, uint32 data, uint32 mask);
void regDmaFill(unsigned no, uint32 data, uint32 mask);
void updateTimer(unsigned no);
uint32 regTimer(unsigned no);
void regTimer(unsigned no, uint32 data, uint32 mask);
uint32 regSlot1Control();
uint32 regSlot1RomControl();
uint32 regSlot1RomCommand(unsigned index);
uint32 regSlot1RomRecv();
void regSlot1Control(uint32 data, uint32 mask);
void regSlot1RomControl(uint32 data, uint32 mask);
void regSlot1RomCommand(unsigned index, uint32 data, uint32 mask);
void regSlot1RomSeed(unsigned index, uint32 data, uint32 mask);
uint32 regSlot2Control();
void regSlot2Control(uint32 data, uint32 mask);
enum {
// REG_IE, REG_IF bits
irqVBlank = 1<<0, irqHBlank = 1<<1,
irqVMatch = 1<<2, irqTimer = 1<<3,
irqClock = 1<<7, irqDma = 1<<8,
irqKeypad = 1<<12, irqSlot2 = 1<<13,
irqSync = 1<<16,
irqMsgSent = 1<<17, irqMsgRecv = 1<<18,
irqCardDone = 1<<19, irqSlot1 = 1<<20,
irqGeomBuf = 1<<21, irqLid = 1<<22,
irqSpi = 1<<23, irqWifi = 1<<24,
};
struct {
bool arm7, arm9;
uint64 xorSeeds[2];
uint2 slot2ramTiming;
uint2 slot2romTiming0;
uint1 slot2romTiming1;
uint2 slot2phi;
} config;
struct {
bool gate;
uint32 enable, flags;
} interrupt;
struct {
uint1 enable, irqOnRecv, irqOnSent;
uint1 error, empty, full;
uint4 writeOffset, readOffset;
uint32 buffer[16];
} msg;
struct {
uint1 enable;
uint4 output;
} sync;
struct {
uint1 inVBlank, inHBlank, inVMatch; // vblank, hblank, line?
uint1 irqOnVBlank, irqOnHBlank, irqOnVMatch; // virq, hirq, lirq?
uint9 vcompare; // lineCompare?
} status;
struct {
uint1 enable, irq, size, repeat;
uint3 trigger;
uint2 srcStep, destStep;
uint32 source, dest, count, fill;
struct {
uint32 source, dest, count;
} init;
} dma[4];
struct Timer {
uint1 enable, irq, cascade;
uint2 divider;
uint16 reload, count;
uint32 lastUpdate;
Event event;
uint32 regRead();
void regWrite(uint32 data, uint32 mask);
void update();
void operator()();
} timer[4];
struct Ev {
bool irq;
bool anydma;
EventQueue queue;
Ev() : queue(60) {}
} event;
bool trace;
uint32 insnLatch;
unsigned powerState;
enum { running, waiting, sleeping };
WordMemory bios;
CPUCore* other;
cothread_t thread;
int32 clock;
};
struct ARM7TDMI : CPUCore {
static void Thread();
void main();
void power();
void step(unsigned clocks);
alwaysinline void istep(unsigned clocks);
alwaysinline void execARM();
alwaysinline void execTHUMB();
uint32 fetch(uint32 addr, uint32 size, bool s);
uint32 read(uint32 addr, uint32 size, bool s);
void write(uint32 addr, uint32 size, bool s, uint32 data);
void dataCop(uint4 cpno, uint4 op1, uint4 ird, uint4 irn, uint4 irm, uint4 op2);
uint32 readReg(uint32 addr, uint32 size);
void writeReg(uint32 addr, uint32 size, uint32 data);
ARM7TDMI();
struct SPI {
uint1 enable, hold, irq, size;
uint2 divider; // 4, 2, 1, 0.5 MHz
uint2 device; enum { power, flash, touch, none };
uint16 data;
} spi;
struct RTC {
uint4 in[2];
uint4 out[2];
uint4 dir[2];
uint8 buffer;
uint3 index;
} rtc;
struct SIO {
uint4 in, out, dir;
uint1 irq;
uint2 mode;
} sio;
uint1 booted;
uint1 flag300;
uint32 regSpi();
void regSpi(uint32 data, uint32 mask);
uint32 regRtc();
void regRtc(uint32 data, uint32 mask);
uint32 regSio();
void regSio(uint32 data, uint32 mask);
};
struct ARM946ES : CPUCore {
SRAM itcm;
SRAM dtcm;
uint32 itcmRegion, itcmRCompare, itcmRMask, itcmWCompare, itcmWMask;
uint32 dtcmRegion, dtcmRCompare, dtcmRMask, dtcmWCompare, dtcmWMask;
struct CR {
uint1 mmu;
uint1 dcache, icache;
uint1 endian; enum { little, big };
uint1 cachePolicy; enum { random, roundRobin };
uint1 dtcm, dtcmLoad;
uint1 itcm, itcmLoad;
} control;
// Math functions
uint2 divMode; enum { div32, div64_32, div64 };
uint1 divByZero;
uint1 divBusy;
int64 numerator;
int64 denominator;
int64 quotient;
int64 remainder;
uint1 rootMode; enum { sqrt32, sqrt64 };
uint1 rootBusy;
uint64 square;
uint32 squareRoot;
uint1 booted;
uint1 flag300;
uint1 slot1access;
uint1 slot2access;
uint1 ramPriority;
static void Thread();
void main();
void power();
void step(unsigned clocks);
alwaysinline void istep(unsigned clocks);
alwaysinline void execARM();
alwaysinline void execTHUMB();
uint32 fetch(uint32 addr, uint32 size, bool s);
uint32 read(uint32 addr, uint32 size, bool s);
void write(uint32 addr, uint32 size, bool s, uint32 data);
void dataCop(uint4 cpno, uint4 op1, uint4 ird, uint4 irn, uint4 irm, uint4 op2);
uint32 readReg(uint32 addr, uint32 size);
void writeReg(uint32 addr, uint32 size, uint32 data);
void updateTcm();
void regDivideControl(uint32 data, uint32 mask);
void regNumerator(unsigned index, uint32 data, uint32 mask);
void regDenominator(unsigned index, uint32 data, uint32 mask);
void regSquareRootControl(uint32 data, uint32 mask);
void regSquare(unsigned index, uint32 data, uint32 mask);
uint32 regDivideControl();
uint32 regNumerator(unsigned index);
uint32 regDenominator(unsigned index);
uint32 regQuotient(unsigned index);
uint32 regRemainder(unsigned index);
uint32 regSquareRootControl();
uint32 regSquare(unsigned index);
uint32 regSquareRoot();
void startDivide();
void startSquareRoot();
ARM946ES();
};
extern ARM7TDMI arm7;
extern ARM946ES arm9;

View File

@ -1,411 +0,0 @@
//#include "bit.hpp"
namespace disasm {
#define collect(i, bits) (Bit::collect< Bit::field(bits) >((long)i))
#define match(i, bits) ((i & force< Bit::mask(bits) >()) \
== force< Bit::binary(bits) >())
template<uint32 arg> static constexpr uint32 force() { return arg; }
static string conds[] = {
"eq","ne","cs","cc","mi","pl","vs","vc","hi","ls","ge","lt","gt","le","",""
};
static string regs[] = {
"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","r11","r12","sp","lr","pc"
};
static string aluops[] = {
"and","eor","sub","rsb","add","adc","sbc","rsc","tst","teq","cmp","cmn","orr","mov","bic","mvn"
};
static string shiftops[] = {
"lsl","lsr","asr","ror"
};
static string blockops[] = {
"da","ia","db","ib"
};
string armAddr(uint2 index, uint4 rn, string u, string rm) {
if(index < 2) return {"[", regs[rn], "], ", u, rm};
if(index == 2) return {"[", regs[rn], ", ", u, rm, "]"};
if(index == 3) return {"[", regs[rn], ", ", u, rm, "]!"};
}
string armImmed(uint8 imm, uint4 rs) {
if(!rs) return {"#0x",hex<8>(imm)};
if(imm & 0x03) return {"#0x",hex<8>(imm << 32-2*rs | imm >> 2*rs)};
else return {"#0x",hex<2>(imm), ", ", 2*rs};
}
string armOffset8(uint4 lo, uint4 hi) {
return {"#0x",hex<2>(hi<<4 | lo)};
}
string armOffset12(uint12 offset) {
return {"#0x",hex<3>(offset)};
}
string armRm(uint4 rm) {
return regs[rm];
}
string armShiftImm(uint4 rm, uint2 sh, uint5 imm) {
if(sh==0 && imm==0) return regs[rm];
if(sh==3 && imm==0) return {regs[rm], ", rrx"};
return {regs[rm], ", ", shiftops[sh], " #", imm==0? 32u : (unsigned)imm};
}
string armShiftReg(uint4 rm, uint2 sh, uint4 rs) {
return {regs[rm], ", ", shiftops[sh], " ", regs[rs]};
}
string armUndefined(uint32 i) {
return {"<undefined> #0x", hex<8>(i)};
}
string armBkpt(uint4 cc, uint12 hi, uint4 lo) {
return {"bkpt", conds[cc], " #0x", hex<4>(hi<<4 | lo)};
}
string armSwi(uint4 cc, uint24 immed) {
return {"swi", conds[cc], " #0x", hex<6>(immed)};
}
string armWritePsr(uint4 cc, uint1 psr, uint4 mask, string rm) {
string f = mask&8? "f":"";
string s = mask&4? "s":"";
string x = mask&2? "x":"";
string c = mask&1? "c":"";
return {"msr", conds[cc], " ", psr? "spsr":"cpsr", "_", f,s,x,c, ", ", rm};
}
string armReadPsr(uint4 cc, uint1 psr, uint4 rd) {
return {"mrs", conds[cc], " ", regs[rd], ", ", psr? "spsr":"cpsr"};
}
string armClz(uint4 cc, uint4 rd, uint4 rm) {
return {"clz", conds[cc], " ", regs[rd], ", ", regs[rm]};
}
string armDspAdd(uint4 cc, uint2 op, uint4 rd, uint4 rn, uint4 rm) {
string alu = op&1? "sub":"add";
string d = op&2? "d":"";
return {"q",d,alu, conds[cc], " ", regs[rd], ", ", regs[rn], ", ", regs[rm]};
}
string armDspMul(uint4 cc, uint2 op, uint2 xy, uint4 rd, uint4 rn, uint4 rm, uint4 rs) {
string x = xy&1? "t":"b";
string y = xy&2? "t":"b";
if(op==3) return {"smul",x,y, conds[cc], " ", regs[rd], ", ", regs[rm], ", ", regs[rs]};
if(op==0) return {"smla",x,y, conds[cc], " ", regs[rd], ", ", regs[rm], ", ", regs[rs], ", ", regs[rn]};
if(op==2) return {"smlal",x,y,conds[cc], " ", regs[rn], ", ", regs[rd], ", ", regs[rm], ", ", regs[rs]};
if(op==1 && (xy&1)) return {"smulw",y, conds[cc], " ", regs[rd], ", ", regs[rm], ", ", regs[rs]};
if(op==1) return {"smlaw",y, conds[cc], " ", regs[rd], ", ", regs[rm], ", ", regs[rs], ", ", regs[rn]};
}
string armMultiply(uint4 cc, uint4 op, uint4 rd, uint4 rn, uint4 rm, uint4 rs) {
string s = op & 1? "s":"";
op &= ~1;
if(op== 0) return {"mul", conds[cc], s, " ", regs[rd], ", ", regs[rm], ", ", regs[rs]};
if(op== 2) return {"mla", conds[cc], s, " ", regs[rd], ", ", regs[rm], ", ", regs[rs], ", ", regs[rn]};
if(op== 8) return {"umull", conds[cc], s, " ", regs[rn], ", ", regs[rd], ", ", regs[rm], ", ", regs[rs]};
if(op==10) return {"umlal", conds[cc], s, " ", regs[rn], ", ", regs[rd], ", ", regs[rm], ", ", regs[rs]};
if(op==12) return {"smull", conds[cc], s, " ", regs[rn], ", ", regs[rd], ", ", regs[rm], ", ", regs[rs]};
if(op==14) return {"smlal", conds[cc], s, " ", regs[rn], ", ", regs[rd], ", ", regs[rm], ", ", regs[rs]};
}
string armData(uint4 cc, uint5 op, uint4 rd, uint4 rn, string rm) {
string alu = aluops[collect(op, "aaaa.")];
string s = collect(op, "....s")? "s" : "";
if(alu == "cmp" || alu == "cmn" || alu == "tst" || alu == "teq")
return {alu, conds[cc], s, " ", regs[rn], ", ", rm};
if(alu == "mov" || alu == "mvn" || rd == rn)
return {alu, conds[cc], s, " ", regs[rd], ", ", rm};
return {alu, conds[cc], s, " ", regs[rd], ", ", regs[rn], ", ", rm};
}
string armMem(uint4 cc, uint5 op, uint4 rd, uint4 rn, string rm) {
uint2 index = collect(op, "p..w.");
string u = collect(op, ".u...")? "" : "-";
string b = collect(op, "..b..")? "b" : "";
uint1 l = collect(op, "....l");
return {cc==15? "pld" : l? "ldr":"str", conds[cc], b, index==1?"t":"",
" ", regs[rd], ", ", armAddr(index, rn, u, rm)};
}
string armMem_v4(uint4 cc, uint5 op1, uint2 op2, uint4 rd, uint4 rn, string rm) {
uint2 index = collect(op1, "p..w.");
string u = collect(op1, ".u...")? "" : "-";
uint1 l = collect(op1, "....l");
string sh = op2==2?"sb" : op2==3?"sh" : "h";
return {l?"ldr":"str", conds[cc], sh, " ", regs[rd], ", ", armAddr(index, rn, u, rm)};
}
string armMem_v5(uint4 cc, uint5 op1, uint2 op2, uint4 rd, uint4 rn, string rm) {
uint2 index = collect(op1, "p..w.");
string u = collect(op1, ".u...")? "" : "-";
uint1 s = op2 & 1;
return {s?"str":"ldr", conds[cc], "d ", regs[rd], ", ", armAddr(index, rn, u, rm)};
}
string armMemSwap(uint4 cc, uint1 op, uint4 rd, uint4 rn, uint4 rm) {
string b = op? "b":"";
return {"swp", conds[cc], b, " ", regs[rm],", ", regs[rd], ", [", regs[rn], "]"};
}
string armMemCop(uint4 cc, uint4 cpno, uint5 op, uint4 rd, uint4 rn, uint8 offset) {
uint2 index = collect(op, "p..w.");
string u = collect(op, ".u...")? "" : "-";
string n = collect(op, "..n..")? "l" : "";
uint1 l = collect(op, "....l");
return {l? "ldc":"stc", conds[cc], cc==15?"2":"", n, " p",cpno, ", c",regs[rd], ", ",
armAddr(index, rn, index?u:"#", {"0x",hex<2>(offset)})};
}
string armBlock(uint4 cc, uint5 op, uint4 rn, uint16 rlist) {
uint2 index = collect(op, "pu...");
string s = collect(op, "..s..")? "^" : "";
string w = collect(op, "...w.")? "!" : "";
uint1 l = collect(op, "....l");
string regnames = "";
for(unsigned b = 0; b < 16; b++) {
if(rlist & 1<<b) regnames = {regnames, ",", regs[b]};
}
regnames.ltrim(",");
return {l?"ldm":"stm", conds[cc], blockops[index], " ", regs[rn], w, ", {", regnames, "}", s};
}
string armBranch(uint4 cc, uint1 link, uint1 exch, uint32 target) {
return {exch?"blx" : link?"bl" : "b", conds[cc], " 0x", hex<8>(target) + 2*link*exch};
}
string armBranchEx(uint4 cc, uint2 sh, uint4 rm) {
return {sh & 1? "blx":"bx", conds[cc], " ", regs[rm]};
}
string armCop(uint4 cc, uint4 cpno, uint4 op1, uint4 rd, uint4 rn, uint4 rm, uint4 op2) {
bool cdp = ~op2 & 1;
bool l = op1 & 1;
if(!cdp) op1 >>= 1;
op2 >>= 1;
return {cdp?"cdp" : l? "mrc":"mcr", conds[cc], cc==15?"2":"",
" p",cpno, ",", op1>>1, ", ", cdp?"cr":"r",rd, ", ", "cr",rn, ", ", "cr",rm, ", ", op2};
}
string armCop_v5(uint4 cc, uint4 cpno, uint4 op1, uint4 rd, uint4 rn, uint4 rm, uint4 op2) {
bool l = op1 & 1;
return {l? "mrrc":"mcrr", conds[cc], cc==15?"2":"",
" p",cpno, ",", op2, ", ", regs[rd], ", ", regs[rn], ", ", "cr",rm};
}
static string thumbmemregops[] = {
"str", "strh", "strb", "ldrsb", "ldr", "ldrh", "ldrb", "ldrsh"
};
static string thumbmemimmops[] = {
"str", "ldr", "strb", "ldrb", "strh", "ldrh"
};
string thumbUndefined(uint16 i) {
return {"<undefined> #0x", hex<4>(i)};
}
string thumbBkpt(uint8 imm) {
return {"bkpt #0x", hex<2>(imm)};
}
string thumbSwi(uint8 imm) {
return {"swi #0x", hex<2>(imm)};
}
string thumbAddSub(uint2 op, uint3 rd, uint3 rn, uint3 rm) {
if(op == 2 && rm == 0) return {"mov ", regs[rd], ", ", regs[rn]};
if(op & 2) return {op & 1?"sub":"add", " ", regs[rd], ", ", regs[rn], ", #", rm};
else return {op & 1?"sub":"add", " ", regs[rd], ", ", regs[rn], ", ", regs[rm]};
}
string thumbShiftImm(uint2 op, uint3 rd, uint3 rm, uint5 imm) {
return {shiftops[op], " ", regs[rd], ", ", regs[rm], ", #", op==0 || imm? (unsigned)imm : 32};
}
string thumbDataImm(uint2 op, uint3 rd, uint8 imm) {
if(op == 0) return {"mov ", regs[rd], ", #0x", hex<2>(imm)};
if(op == 1) return {"cmp ", regs[rd], ", #0x", hex<2>(imm)};
if(op == 2) return {"add ", regs[rd], ", #0x", hex<2>(imm)};
if(op == 3) return {"sub ", regs[rd], ", #0x", hex<2>(imm)};
}
string thumbDataLo(uint4 op, uint3 rd, uint3 rm) {
if(op == 2) return {"lsl ", regs[rd], ", ", regs[rm]};
if(op == 3) return {"lsr ", regs[rd], ", ", regs[rm]};
if(op == 4) return {"asr ", regs[rd], ", ", regs[rm]};
if(op == 7) return {"ror ", regs[rd], ", ", regs[rm]};
if(op == 9) return {"neg ", regs[rd], ", ", regs[rm]};
if(op ==13) return {"mul ", regs[rd], ", ", regs[rm]};
return {aluops[op], " ", regs[rd], ", ", regs[rm]};
}
string thumbDataHi(uint2 op, uint4 rd, uint4 rm) {
if(op == 0) return {"add ", regs[rd], ", ", regs[rm]};
if(op == 1) return {"cmp ", regs[rd], ", ", regs[rm]};
if(op == 2) return {"mov ", regs[rd], ", ", regs[rm]};
if(rd & 8) return {"blx ", regs[rm]};
else return {"bx ", regs[rm]};
}
string thumbMemReg(uint3 op, uint3 rd, uint3 rn, uint3 rm) {
return {thumbmemregops[op], " ", regs[rd], ", [", regs[rn], ", ", regs[rm], "]"};
}
string thumbMemImm(uint5 op, uint3 rd, uint3 rn, uint5 offset) {
unsigned rm = offset;
if(op < 14) rm *= 4;
if(op >= 16) rm *= 2;
return {thumbmemimmops[op-12], " ", regs[rd], ", [", regs[rn], ", #0x", hex<3>(rm), "]"};
}
string thumbAddSP(uint1 op, uint7 offset) {
return {op?"sub":"add", " sp, #0x", hex<3>(4*offset)};
}
string thumbRelative(uint5 op, uint3 rd, uint8 offset) {
if(op == 9) return {"ldr ", regs[rd], ", [pc, #0x", hex<3>(4*offset), "]"};
if(op ==18) return {"str ", regs[rd], ", [sp, #0x", hex<3>(4*offset), "]"};
if(op ==19) return {"ldr ", regs[rd], ", [sp, #0x", hex<3>(4*offset), "]"};
if(op ==20) return {"add ", regs[rd], ", pc, ", "#0x", hex<3>(4*offset)};
if(op ==21) return {"add ", regs[rd], ", sp, ", "#0x", hex<3>(4*offset)};
}
string thumbBlock(uint4 op, uint3 rn, uint8 rlist) {
bool l = op & 1;
string regnames = "";
for(unsigned b = 0; b < 8; b++) {
if(rlist & 1<<b) regnames = {regnames, ",", regs[b]};
}
if(op < 8) {
if( l && (rn & 1)) regnames = {regnames, ",pc"};
if(!l && (rn & 1)) regnames = {regnames, ",lr"};
regnames.ltrim(",");
return {l? "pop":"push", " {", regnames, "}"};
}
else {
regnames.ltrim(",");
return {l? "ldm":"stm", " ", regs[rn], "!, {", regnames, "}"};
}
}
string thumbCond(uint4 cc, uint32 target) {
return {"b", conds[cc], " 0x", hex<8>(target)};
}
string thumbBranch(uint32 target) {
return {"b 0x", hex<8>(target)};
}
string thumbBh(int11 offset_hi) {
return {"bh 0x", hex<3>(2*offset_hi)};
}
string thumbBl(uint1 tbit, uint32 target) {
return {tbit? "bl":"blx", " 0x", hex<8>(target)};
}
string arm(ARMCore* arm, uint32 i) {
uint8 op = i>>20;
uint4 cc = i>>28;
uint4 rd = i>>12, rn = i>>16;
uint4 rm = i, rs = i>>8;
uint2 sh = i>>5;
uint32 pc = arm->r[15] & ~3;
if(match(i, "1111 0101u101/// ..../")) return armMem(15, op, rd, rn, armOffset12(i));
if(match(i, "1111 0111u101/// ...0/")) return armMem(15, op, rd, rn, armShiftImm(rm, sh, i>>7));
if(match(i, "1111 101l..../// ..../")) return armBranch(14, 0, 1, pc + 4*int24(i));
if(match(i, "1111 1100000l/// ..../")) return armUndefined(i);
if(match(i, "1111 1100010l/// ..../")) return armCop_v5(15, i>>8, op, rd, rn, rm, i>>4);
if(match(i, "1111 110punwl/// ..../")) return armMemCop(15, i>>8, op, rd, rn, i);
if(match(i, "1111 1110..../// ..../")) return armCop(15, i>>8, op, rd, rn, rm, i>>4);
if(match(i, "1111 ......../// ..../")) return armUndefined(i);
if(match(i, ".... 00110r10/// ..../")) return armWritePsr(cc, i>>22, rn, armImmed(i, rs));
if(match(i, ".... 00010r10/// 0000/")) return armWritePsr(cc, i>>22, rn, armRm(rm));
if(match(i, ".... 00010r00/// 0000/")) return armReadPsr (cc, i>>22, rd);
if(match(i, ".... 00010010/// 00l1/")) return armBranchEx(cc, sh, rm);
if(match(i, ".... 00010010/// 0111/")) return armBkpt (cc, i>>8, i);
if(match(i, ".... 00010110/// 0001/")) return armClz (cc, rd, rm);
if(match(i, ".... 00010ds0/// 0101/")) return armDspAdd (cc, i>>21, rd, rn, rm);
if(match(i, ".... 00010oo0/// 1yx0/")) return armDspMul (cc, i>>21, sh, rn, rd, rm, rs);
if(match(i, ".... 0000luas/// 1001/")) return armMultiply(cc, op, rn, rd, rm, rs);
if(match(i, ".... 00010b00/// 1001/")) return armMemSwap (cc, i>>22, rd, rn, rm);
if(match(i, ".... 0000ui1l/// 1sh1/")) return armUndefined(i);
if(match(i, ".... 000pu0w0/// 11s1/")) return armMem_v5 (cc, op, sh, rd, rn, armRm(i));
if(match(i, ".... 000pu0wl/// 1sh1/")) return armMem_v4 (cc, op, sh, rd, rn, armRm(i));
if(match(i, ".... 000pu1w0/// 11s1/")) return armMem_v5 (cc, op, sh, rd, rn, armOffset8(i, i>>8));
if(match(i, ".... 000pu1wl/// 1sh1/")) return armMem_v4 (cc, op, sh, rd, rn, armOffset8(i, i>>8));
if(match(i, ".... 000aaaas/// 0sh1/")) return armData (cc, op, rd, rn, armShiftReg(rm, sh, rs));
if(match(i, ".... 000aaaas/// .sh0/")) return armData (cc, op, rd, rn, armShiftImm(rm, sh, i>>7));
if(match(i, ".... 001aaaas/// ..../")) return armData (cc, op, rd, rn, armImmed(i, rs));
if(match(i, ".... 010pubwl/// ..../")) return armMem (cc, op, rd, rn, armOffset12(i));
if(match(i, ".... 011pubwl/// ...0/")) return armMem (cc, op, rd, rn, armShiftImm(rm, sh, i>>7));
if(match(i, ".... 100puswl/// ..../")) return armBlock(cc, op, rn, i);
if(match(i, ".... 101l..../// ..../")) return armBranch(cc, i>>24, 0, pc + 4*int24(i));
if(match(i, ".... 1100000l/// ..../")) return armUndefined(i);
if(match(i, ".... 1100010l/// ..../")) return armCop_v5(cc, i>>8, op, rd, rn, rm, i>>4);
if(match(i, ".... 110punwl/// ..../")) return armMemCop(cc, i>>8, op, rd, rn, i);
if(match(i, ".... 1110..../// ..../")) return armCop(cc, i>>8, op, rd, rn, rm, i>>4);
if(match(i, ".... 1111..../// ..../")) return armSwi(cc, i);
return armUndefined(i);
}
string thumb(ARMCore* arm, uint16 i) {
uint32 pc = arm->r[15] & ~1;
if(match(i, "00011ismmmnnnddd")) return thumbAddSub (i>>9, i, i>>3, i>>6);
if(match(i, "000ssiiiiimmmddd")) return thumbShiftImm(i>>11, i, i>>3, i>>6);
if(match(i, "001oodddiiiiiiii")) return thumbDataImm (i>>11, i>>8, i);
if(match(i, "010000oooommmddd")) return thumbDataLo (i>>6, i, i>>3);
if(match(i, "010001oodmmmmddd")) return thumbDataHi (i>>8, (i&7)+(i>>4&8), i>>3);
if(match(i, "0101ooommmnnnddd")) return thumbMemReg (i>>9, i, i>>3, i>>6);
if(match(i, "011bliiiiinnnddd")) return thumbMemImm (i>>11, i, i>>3, i>>6);
if(match(i, "1000liiiiinnnddd")) return thumbMemImm (i>>11, i, i>>3, i>>6);
if(match(i, "10111110........")) return thumbBkpt(i);
if(match(i, "10110000siiiiiii")) return thumbAddSP ( i>>7, i);
if(match(i, "01001dddrrrrrrrr")) return thumbRelative(i>>11, i>>8, i);
if(match(i, "1001odddrrrrrrrr")) return thumbRelative(i>>11, i>>8, i);
if(match(i, "1010odddrrrrrrrr")) return thumbRelative(i>>11, i>>8, i);
if(match(i, "11011110rrrrrrrr")) return thumbUndefined(i);
if(match(i, "11011111rrrrrrrr")) return thumbSwi(i);
if(match(i, "1101ccccrrrrrrrr")) return thumbCond ( i>>8, pc + 2*int8(i));
if(match(i, "11100rrrrrrrrrrr")) return thumbBranch (pc + 2*int11(i));
if(match(i, "11110rrrrrrrrrrr")) return thumbBh (i);
if(match(i, "11101rrrrrrrrrr1")) return thumbUndefined(i);
if(match(i, "111t1rrrrrrrrrrr")) return thumbBl (i>>12, arm->r[14] + 2*uint11(i));
if(match(i, "1o..lnnnrrrrrrrr")) return thumbBlock (i>>11, i>>8, i);
return thumbUndefined(i);
}
#undef collect
#undef match
}

View File

@ -1,131 +0,0 @@
bool CPUCore::dmaTrigger(unsigned channels, unsigned value) {
for(unsigned no = 0; no < 4; no++) {
if((channels & 1<<no) && dma[no].enable && dma[no].trigger == value)
dmaTransfer(no);
else if(no+1 == 4)
return false;
}
return true;
}
void CPUCore::dmaTransfer(unsigned no) {
auto &dma = this->dma[no];
uint32 size = dma.size? Word : Half;
uint32 srcmask = config.arm7 && no == 0? 0x07fffffe : 0x0ffffffe;
uint32 destmask = config.arm7 && no < 3? 0x07fffffe : 0x0ffffffe;
uint32 countmask = 0x1fffff;
if(config.arm7)
countmask = no < 3? 0x3fff : 0xffff;
int srcinc = (dma.srcStep == 1? -2 : dma.srcStep != 2? +2 : 0);
int destinc = (dma.destStep == 1? -2 : dma.destStep != 2? +2 : 0);
if(size == Word)
srcinc <<= 1, destinc <<= 1;
do {
uint32 data = read(dma.source, size, true);
write(dma.dest, size, true, data);
dma.source = (dma.source + srcinc) & srcmask;
dma.dest = (dma.dest + destinc) & destmask;
} while(--dma.count & countmask);
if(dma.destStep == 3) dma.dest = dma.init.dest;
if(dma.repeat == 1) dma.count = dma.init.count;
if(dma.repeat == 0) dma.enable = false;
if(dma.irq) interrupt.flags |= irqDma << no;
}
uint32 CPUCore::regDmaControl(unsigned no) {
return dma[no].init.count<<0 | dma[no].destStep<<21 | dma[no].srcStep<<23
| dma[no].repeat<<25 | dma[no].size<<26 | dma[no].trigger<<27
| dma[no].irq<<30 | dma[no].enable<<31;
}
uint32 CPUCore::regDmaSource(unsigned no) {
return dma[no].init.source;
}
uint32 CPUCore::regDmaDest(unsigned no) {
return dma[no].init.dest;
}
uint32 CPUCore::regDmaFill(unsigned no) {
return dma[no].fill;
}
void CPUCore::regDmaControl(unsigned no, uint32 data, uint32 mask) {
auto &dma = this->dma[no];
uint32 countmask = 0x1fffff;
if(config.arm7) // still limited to 16K / 64K transfers
countmask = no < 3? 0x3fff : 0xffff;
if(mask & countmask) {
dma.init.count ^= (dma.init.count ^ data) & countmask;
}
if(mask & 0xffe00000) { // ??
bool previously = dma.enable;
dma.destStep = data>>21;
dma.srcStep = data>>23;
dma.repeat = data>>25;
dma.size = data>>26;
dma.trigger = data>>27;
dma.irq = data>>30;
dma.enable = data>>31;
if(config.arm7)
dma.trigger >>= 1;
if(dma.enable) {
if(previously == false) {
// Latch new settings
dma.source = dma.init.source;
dma.dest = dma.init.dest;
dma.count = dma.init.count;
}
if(dma.trigger == 0) {
dmaTransfer(no);
}
else if(dma.trigger == 7) {
// Geometry fifo - DMA display list
// - just do it right here since we lack timing
dmaTransfer(no);
}
//else {
// print(config.arm7?"arm7":"arm9",
// ": dma trigger ",dma.trigger,": unimplemented\n");
//}
}
}
}
void CPUCore::regDmaSource(unsigned no, uint32 data, uint32 mask) {
mask &= 0x0ffffffe;
if(config.arm7 && no == 0)
mask &= 0x07fffffe; // DMA0 is internal only
dma[no].init.source ^= (dma[no].init.source ^ data) & mask;
}
void CPUCore::regDmaDest(unsigned no, uint32 data, uint32 mask) {
mask &= 0x0ffffffe;
if(config.arm7 && no != 3)
mask &= 0x07fffffe; // only DMA3 can write to slot 2
dma[no].init.dest ^= (dma[no].init.dest ^ data) & mask;
}
void CPUCore::regDmaFill(unsigned no, uint32 data, uint32 mask) {
if(config.arm7)
return; // not present
dma[no].fill ^= (dma[no].fill ^ data) & mask;
}

View File

@ -1,115 +0,0 @@
void ARM946ES::regDivideControl(uint32 data, uint32 mask) {
if(mask & 3) divMode = data;
startDivide();
}
void ARM946ES::regNumerator(unsigned index, uint32 data, uint32 mask) {
int64 update = data & mask; update <<= 32*index;
int64 umask = mask; umask <<= 32*index;
numerator ^= (numerator ^ update) & umask;
startDivide();
}
void ARM946ES::regDenominator(unsigned index, uint32 data, uint32 mask) {
int64 update = data & mask; update <<= 32*index;
int64 umask = mask; umask <<= 32*index;
denominator ^= (denominator ^ update) & umask;
startDivide();
}
void ARM946ES::regSquareRootControl(uint32 data, uint32 mask) {
if(mask & 1) rootMode = data;
startSquareRoot();
}
void ARM946ES::regSquare(unsigned index, uint32 data, uint32 mask) {
int64 update = data & mask; update <<= 32*index;
int64 umask = mask; umask <<= 32*index;
square ^= (square ^ update) & umask;
startSquareRoot();
}
uint32 ARM946ES::regDivideControl() {
return divMode<<0 | divByZero<<13 | divBusy<<15;
}
uint32 ARM946ES::regNumerator(unsigned index) {
return numerator >> 32*index;
}
uint32 ARM946ES::regDenominator(unsigned index) {
return denominator >> 32*index;
}
uint32 ARM946ES::regQuotient(unsigned index) {
return quotient >> 32*index;
}
uint32 ARM946ES::regRemainder(unsigned index) {
return remainder >> 32*index;
}
uint32 ARM946ES::regSquareRootControl() {
return rootMode<<0 | rootBusy<<15;
}
uint32 ARM946ES::regSquare(unsigned index) {
return square >> 32*index;
}
uint32 ARM946ES::regSquareRoot() {
return squareRoot;
}
void ARM946ES::startDivide() {
divByZero = false;
int64 n = numerator, d = denominator;
if(divMode == div32) { n &= 0xffffffffll; n -= 2*(n & 1ll<<31); }
if(divMode != div64) { d &= 0xffffffffll; d -= 2*(d & 1ll<<31); }
// Ugh, GBATEK does not say at all how remainder works with signed values.
// Will have to fix this later..
if(d == 0) {
divByZero = true;
quotient = n >= 0? -1 : +1;
remainder = n;
}
else if(n == (-1ll<<63) && d == -1ll) {
quotient = -1ll<<63;
}
else {
quotient = n / d;
remainder = n % d;
}
}
void ARM946ES::startSquareRoot() {
uint64 x = square, r = 0;
if(rootMode == sqrt32)
x = uint32(x);
for(uint64 bit = 1ull << 62; bit != 0; bit >>= 2) {
r >>= 1;
if(x >= 2*r + bit) {
x -= 2*r + bit;
r += bit;
}
}
squareRoot = r;
}

View File

@ -1,98 +0,0 @@
void CPUCore::popMsg() {
msg.full = false;
if(++msg.readOffset == msg.writeOffset)
msg.empty = true;
}
void CPUCore::pushMsg(uint32 data) {
msg.buffer[msg.writeOffset] = data;
msg.empty = false;
if(++msg.writeOffset == msg.readOffset)
msg.full = true;
}
void CPUCore::clearMsg() {
msg.readOffset = 0;
msg.writeOffset = 0;
msg.empty = true;
msg.full = false;
for(unsigned n = 0; n < 16; n++)
msg.buffer[n] = 0;
}
uint32 CPUCore::regMsgControl() {
return msg.enable<<15 | msg.error<<14
| msg.irqOnRecv<<10 | other->msg.full<<9 | other->msg.empty<<8
| msg.irqOnSent<<2 | this->msg.full<<1 | this->msg.empty<<0;
}
void CPUCore::regMsgControl(uint32 data, uint32 mask) {
if(mask & 0x00ff) {
bool previously = msg.irqOnSent;
msg.irqOnSent = data>>2;
if(data & 1<<3) clearMsg();
// Edge triggered
if(msg.empty && msg.irqOnSent && previously == false) {
interrupt.flags |= irqMsgSent;
}
}
if(mask & 0xff00) {
bool previously = msg.irqOnRecv;
msg.irqOnRecv = data>>10;
msg.enable = data>>15;
if(data & 1<<14) msg.error = false;
// Edge triggered
if(!other->msg.empty && msg.irqOnRecv && previously == false)
interrupt.flags |= irqMsgRecv;
}
}
void CPUCore::regMsgSend(uint32 data) {
if(msg.enable) {
if(msg.full) {
msg.error = true;
return;
}
bool wasEmpty = msg.empty;
pushMsg(data);
if(wasEmpty && other->msg.irqOnRecv)
other->interrupt.flags |= irqMsgRecv;
}
}
uint32 CPUCore::regMsgRecv() {
uint32 data = other->msg.buffer[other->msg.readOffset];
if(other->msg.enable) {
if(other->msg.empty) {
msg.error = true;
return data;
}
other->popMsg();
if(other->msg.empty && other->msg.irqOnSent)
other->interrupt.flags |= irqMsgSent;
}
return data;
}
uint32 CPUCore::regSync() {
return sync.enable<<14 | this->sync.output<<8 | other->sync.output<<0;
}
void CPUCore::regSync(uint32 data, uint32 mask) {
if(mask & 0xff00) {
sync.enable = data>>14;
sync.output = data>>8;
if(data & 1<<13) {
if(other->sync.enable)
other->interrupt.flags |= irqSync;
}
}
}

View File

@ -1,131 +0,0 @@
uint32 CPUCore::regSlot1Control() {
return slot1.spi.baud<<0 | slot1.spi.hold<<6
| slot1.spi.busy<<7 | slot1.spi.enable<<13
| slot1.transferIrq<<14 | slot1.enable<<15
| slot1.spi.data<<16;
}
uint32 CPUCore::regSlot1RomControl() {
return slot1.decryptLatency<<0 | slot1.xorData<<13
| slot1.responseLatency<<16 | slot1.xorCmds<<22
| slot1.dataReady<<23 | slot1.blockSize<<24
| slot1.clock<<27 | slot1.secureMode<<28
| 1<<29 | slot1.transferPending<<31;
}
uint32 CPUCore::regSlot1RomCommand(unsigned index) {
auto cmd = slot1.command;
if(index == 0) cmd >>= 32;
return (cmd>>24 & 0xff) << 0
| (cmd>>16 & 0xff) << 8
| (cmd>> 8 & 0xff) << 16
| (cmd>> 0 & 0xff) << 24;
}
uint32 CPUCore::regSlot1RomRecv() {
uint32 data = 0;
data += slot1.readRom() << 0;
data += slot1.readRom() << 8;
data += slot1.readRom() << 16;
data += slot1.readRom() << 24;
return data;
}
void CPUCore::regSlot1Control(uint32 data, uint32 mask) {
if(mask & 0x000000ff) {
slot1.spi.baud = data>>0;
slot1.spi.hold = data>>6;
}
if(mask & 0x0000ff00) {
slot1.spi.enable = data>>13;
slot1.transferIrq = data>>14;
slot1.enable = data>>15;
if(slot1.enable == false || slot1.spi.enable == false) {
if(slot1.card && slot1.card->spi) {
//print("slot1 spi: deselect\n");
slot1.card->spi->select(false);
}
}
}
if(mask & 0x00ff0000) {
// SPI transfer
if(!slot1.spi.enable) return;
slot1.spi.data = slot1.spiTransfer(data>>16);
//print("slot1 spi: w ",hex<2>(data>>16)," r ",hex<2>(slot1.spi.data),"\n");
}
}
void CPUCore::regSlot1RomControl(uint32 data, uint32 mask) {
if(mask & 0x00001fff) {
slot1.decryptLatency ^= (slot1.decryptLatency ^ data) & mask;
}
if(mask & 0x0000e000) {
slot1.xorData = data>>13;
if(data & 1<<15) {
slot1.lfsr[0] = config.xorSeeds[0];
slot1.lfsr[1] = config.xorSeeds[1];
}
}
if(mask & 0x00ff0000) {
slot1.responseLatency = data>>16;
slot1.xorCmds = data>>22;
}
if(mask & 0xff000000) {
slot1.blockSize = data>>24;
slot1.clock = data>>27;
slot1.secureMode = data>>28;
if(data & 1<<31)
slot1.startRomTransfer();
}
}
void CPUCore::regSlot1RomCommand(unsigned index, uint32 data, uint32 mask) {
auto &cmd = slot1.command;
uint64 ldata = data;
// Big endian commands, argh!
if(index == 0) {
if(mask & 0x000000ff) cmd ^= (cmd ^ ldata<<56) & 0xff00000000000000;
if(mask & 0x0000ff00) cmd ^= (cmd ^ ldata<<40) & 0x00ff000000000000;
if(mask & 0x00ff0000) cmd ^= (cmd ^ ldata<<24) & 0x0000ff0000000000;
if(mask & 0xff000000) cmd ^= (cmd ^ ldata<< 8) & 0x000000ff00000000;
} else {
if(mask & 0x000000ff) cmd ^= (cmd ^ ldata<<24) & 0x00000000ff000000;
if(mask & 0x0000ff00) cmd ^= (cmd ^ ldata<< 8) & 0x0000000000ff0000;
if(mask & 0x00ff0000) cmd ^= (cmd ^ ldata>> 8) & 0x000000000000ff00;
if(mask & 0xff000000) cmd ^= (cmd ^ ldata>>24) & 0x00000000000000ff;
}
}
void CPUCore::regSlot1RomSeed(unsigned index, uint32 data, uint32 mask) {
if(index == 2) {
data &= 0x7f007f & mask;
if(mask & 0x00007f) config.xorSeeds[0] = (config.xorSeeds[0] & 0xffffffff) | uint64(data>>0 )<<32;
if(mask & 0x7f0000) config.xorSeeds[1] = (config.xorSeeds[1] & 0xffffffff) | uint64(data>>16)<<32;
}
else {
config.xorSeeds[index] ^= (config.xorSeeds[index] ^ data) & mask;
}
}
uint32 CPUCore::regSlot2Control() {
return config.slot2ramTiming<<0 | config.slot2romTiming0<<2
| config.slot2romTiming1<<4 | config.slot2phi<<5
| !arm9.slot2access<<7 | !arm9.slot1access<<11
| 1<<13 | 1<<14 | !arm9.ramPriority<<15;
}
void CPUCore::regSlot2Control(uint32 data, uint32 mask) {
if(mask & 0xff) {
config.slot2ramTiming = data>>0;
config.slot2romTiming0 = data>>2;
config.slot2romTiming1 = data>>4;
config.slot2phi = data>>5;
}
}

View File

@ -1,150 +0,0 @@
void ARMCore::thumbShiftImm(uint2 opcode, uint3 ird, uint3 irm, uint5 rs) {
auto &rd = r[ird];
r[15] += 2;
SOut r = shiftImm(irm, opcode, rs);
bitf(true, rd = r, r); // lsl, lsr, asr
}
void ARMCore::thumbAddSub(uint2 opcode, uint3 ird, uint3 irn, uint3 irm) {
auto& rd = r[ird], rn = r[irn], rm = opcode & 2? (uint32)irm : r[irm];
r[15] += 2;
if(opcode & 1) sumf(true, rd = rn-rm, rn, ~rm); // subs
else sumf(true, rd = rn+rm, rn, rm); // adds
}
void ARMCore::thumbDataImm(uint2 opcode, uint3 ird, uint8 rm) {
auto &rd = r[ird], rn = rd;
r[15] += 2;
if(opcode == 0) bitf(true, rd = rm, {rm, Cf}); // movs
if(opcode == 1) sumf(true, rn-rm, rn, ~rm); // cmps
if(opcode == 2) sumf(true, rd = rn+rm, rn, rm); // adds
if(opcode == 3) sumf(true, rd = rn-rm, rn, ~rm); // subs
}
void ARMCore::thumbDataLo(uint4 opcode, uint3 ird, uint3 irm) {
auto &rd = r[ird], rm = r[irm];
r[15] += 2;
if(opcode == 2) { SOut r = lsl(rd, rm); bitf(true, rd = r, r); } // lsls
else if(opcode == 3) { SOut r = lsr(rd, rm); bitf(true, rd = r, r); } // lsrs
else if(opcode == 4) { SOut r = asr(rd, rm); bitf(true, rd = r, r); } // asrs
else if(opcode == 7) { SOut r = ror(rd, rm); bitf(true, rd = r, r); } // rors
else if(opcode == 9) sumf(true, rd = -rm, 0, ~rm); // negs
else if(opcode == 13) bitf(true, rd = rm * rd, {rm*rd, Cf}); // muls
else alu(2*opcode+1, rd, rd, {rm,Cf}); // others are same as ARM
}
void ARMCore::thumbDataHi(uint2 opcode, uint4 ird, uint4 irm) {
auto &rd = r[ird], rn = rd, rm = r[irm];
r[15] += 2;
if(opcode == 0) rd = rn + rm; // add
if(opcode == 1) sumf(true, rn-rm, rn, ~rm); // cmps
if(opcode == 2) rd = rm; // mov
if(opcode == 3) { // bx, blx
if(ird & 8) r[14] = r[15] - 4 + 1;
return branch(rm & 1, rm);
}
if(ird == 15) branch(1, r[15]);
}
void ARMCore::thumbMemImm(uint5 opcode, uint3 ird, uint3 irn, uint5 rm) {
auto &rd = r[ird], addr = r[irn];
r[15] += 2;
if(opcode == 13) rd = load(addr + 4*rm, Word); // ldr
if(opcode == 15) rd = load(addr + 1*rm, Byte); // ldrb
if(opcode == 17) rd = load(addr + 2*rm, Half); // ldrh
if(opcode == 12) store(addr + 4*rm, Word, rd); // str
if(opcode == 14) store(addr + 1*rm, Byte, rd); // strb
if(opcode == 16) store(addr + 2*rm, Half, rd); // strh
}
void ARMCore::thumbMemReg(uint3 opcode, uint3 ird, uint3 irn, uint3 irm) {
bool ld = opcode & 1;
auto &rd = r[ird], addr = r[irn] + r[irm];
r[15] += 2;
if(opcode == 0) store(addr, Word, rd); // str
if(opcode == 1) store(addr, Half, rd); // strh
if(opcode == 2) store(addr, Byte, rd); // strb
if(opcode == 4) rd = load(addr, Word); // ldr
if(opcode == 5) rd = load(addr, Half); // ldrh
if(opcode == 6) rd = load(addr, Byte); // ldrb
if(opcode == 7) rd = (int16) load(addr, Half); // ldrsh
if(opcode == 3) rd = (int8) load(addr, Byte); // ldrsb
}
void ARMCore::thumbRelative(uint5 opcode, uint3 ird, uint8 rm) {
auto &rd = r[ird], pc = r[15] & ~3;
r[15] += 2;
if(opcode == 9) rd = load(pc + 4*rm, Word); // ldr [pc,#n]
if(opcode ==18) store(r[13] + 4*rm, Word, rd); // str [sp,#n]
if(opcode ==19) rd = load(r[13] + 4*rm, Word); // ldr [sp,#n]
if(opcode ==20) rd = pc + 4*rm; // adr rd,pc,..
if(opcode ==21) rd = r[13] + 4*rm; // adr rd,sp,..
}
void ARMCore::thumbAddSP(uint1 opcode, uint7 rm) {
r[15] += 2;
if(opcode == 0) r[13] += 4*rm; // add sp,#n
if(opcode == 1) r[13] -= 4*rm; // sub sp,#n
}
void ARMCore::thumbBlock(uint4 opcode, uint3 irn, uint8 rlist) {
auto &rn = opcode < 8? r[13] : r[irn];
bool ld = opcode & 1;
bool up = true;
bool writeback = true;
uint32 regs = rlist;
if(opcode < 8) {
up = ld;
if(irn & 1) regs |= (1 << 14+ld);
}
uint32 addr = rn, base = rn, size = 4*bit::count(regs);
if(!up) addr -= size;
r[15] += 2;
for(unsigned b = 0, s = 0; b < 16; b++) {
if(~regs & 1<<b) continue;
if(ld) r[b] = read(addr, Word, s++);
else write(addr, Word, s++, r[b]);
if(writeback) {
writeback = false;
rn = up? base + size : base - size;
}
addr += 4;
}
if(regs & 1<<15) branch(r[15] & 1, r[15]);
}
void ARMCore::thumbCond(uint4 opcode, int8 offset) {
if(opcode == 14) { return undefined(); }
if(opcode == 15) { return swi(); }
if(evalCond(opcode)) return branch(1, r[15] + 2*offset);
r[15] += 2;
}
void ARMCore::thumbBranch(int11 offset) {
branch(1, r[15] + 2*offset);
}
void ARMCore::thumbBh(int11 offset) {
r[14] = r[15] + 2*(offset << 11);
r[15] += 2;
}
void ARMCore::thumbBlx(uint1 tf, uint11 offset) {
uint32 base = r[14];
r[14] = r[15]-2 + 1;
branch(tf, base + 2*offset);
}

View File

@ -1,78 +0,0 @@
uint32 CPUCore::regTimer(unsigned no) {
auto &timer = this->timer[no];
if(timer.cascade == false)
updateTimer(no);
return timer.count<<0 | timer.divider<<16
| timer.cascade<<18 | timer.irq<<22 | timer.enable<<23;
}
void CPUCore::regTimer(unsigned no, uint32 data, uint32 mask) {
auto &timer = this->timer[no];
if(timer.cascade == false)
updateTimer(no);
event.queue.remove(timer.event);
if(mask & 0x00ffff) {
timer.reload ^= (timer.reload ^ data) & mask;
}
if(mask & 0xff0000) {
if(!timer.enable && (data & 1<<23)) {
timer.count = timer.reload;
}
timer.divider = data>>16;
timer.cascade = data>>18;
timer.irq = data>>22;
timer.enable = data>>23;
}
if(timer.cascade == false)
updateTimer(no);
}
void CPUCore::updateTimer(unsigned no) {
auto &timer = this->timer[no];
if(timer.enable) {
uint16 old = timer.count;
uint32 ticks = 1;
unsigned dbits[] = { 1, 7, 9, 11 }; // 33MHz, 512KHz, 128KHz, 32KHz
unsigned s = dbits[timer.divider];
uint32 divider = 1 << s;
if(timer.cascade) {
timer.count++;
} else {
ticks = (event.queue.time >> s) - (timer.lastUpdate >> s);
timer.count += ticks;
}
if(ticks && timer.count <= old) {
if(timer.irq) interrupt.flags |= irqTimer << no;
timer.count = timer.reload;
if(no < 3 && this->timer[no+1].cascade)
updateTimer(no+1);
}
if(timer.cascade == false) {
uint32 ticksLeft = 0x10000 - timer.count;
uint32 nextTick = divider;// - (event.queue.time & divider-1);
uint32 nextOverflow = nextTick + divider * (ticksLeft-1);
// The above commented part breaks maxmod's interpolated audio
// - perhaps the internal counters have expanded?
if(event.queue.time & 1)
nextTick -= 1; // align to 33MHz clock however
event.queue.add(nextOverflow, timer.event);
}
}
timer.lastUpdate = event.queue.time;
}

View File

@ -1,527 +0,0 @@
bool GPU::gxMatrixMode() {
if(numArgs < 1) return false;
matrixMode = args[0];
return true;
}
bool GPU::gxMatrixPush() {
if(matrixMode == mmTexture) {
return true;
}
if(matrixMode == mmProjection) {
projStack[0] = projMatrix;
if(projSP++ == 1) { stackOverflow = true; }
return true;
}
// mmModelView or mmLitView
lightStack[viewSP % 32] = lightMatrix;
viewStack[viewSP % 32] = viewMatrix;
if(viewSP++ >= 31) { stackOverflow = true; }
return true;
}
bool GPU::gxMatrixPop() {
if(numArgs < 1) return false;
if(matrixMode == mmTexture) {
return true;
}
if(matrixMode == mmProjection) {
if(projSP-- == 0) { stackOverflow = true; }
projMatrix = projStack[0];
return true;
}
// mmModelView or mmLitView
args[0] %= 64;
viewSP -= args[0];
if(viewSP >= 31) { stackOverflow = true; }
lightMatrix = lightStack[viewSP % 32];
viewMatrix = viewStack[viewSP % 32];
return true;
}
bool GPU::gxMatrixStore() {
if(numArgs < 1) return false;
if(matrixMode == mmTexture) {
return true;
}
if(matrixMode == mmProjection) {
projStack[0] = projMatrix;
return true;
}
// mmModelView or mmLitView
args[0] %= 32;
if(args[0] == 31) { stackOverflow = true; }
lightStack[args[0]] = lightMatrix;
viewStack[args[0]] = viewMatrix;
return true;
}
bool GPU::gxMatrixRestore() {
if(numArgs < 1) return false;
if(matrixMode == mmTexture) {
return true;
}
if(matrixMode == mmProjection) {
projMatrix = projStack[0];
clipMatrix = projMatrix;
transform(clipMatrix, viewMatrix);
return true;
}
// mmModelView or mmLitView
args[0] %= 32;
if(args[0] == 31) { stackOverflow = true; }
lightMatrix = lightStack[args[0]];
viewMatrix = viewStack[args[0]];
clipMatrix = projMatrix;
transform(clipMatrix, viewMatrix);
return true;
}
bool GPU::gxMatrixLoadIdentity() {
loadMatrix({ 0x1000, 0, 0, 0,
0, 0x1000, 0, 0,
0, 0, 0x1000, 0,
0, 0, 0, 0x1000 });
return true;
}
// Storage is transposed to make each row contiguous, which is opposite
// of OpenGL. This allows easy multiplication with column vectors.
bool GPU::gxMatrixLoad4x4() {
if(numArgs < 16) return false;
loadMatrix({ args[ 0], args[ 4], args[ 8], args[12],
args[ 1], args[ 5], args[ 9], args[13],
args[ 2], args[ 6], args[10], args[14],
args[ 3], args[ 7], args[11], args[15] });
return true;
}
bool GPU::gxMatrixLoad4x3() {
if(numArgs < 12) return false;
loadMatrix({ args[ 0], args[ 3], args[ 6], args[ 9],
args[ 1], args[ 4], args[ 7], args[10],
args[ 2], args[ 5], args[ 8], args[11],
0, 0, 0, 0x1000 });
return true;
}
bool GPU::gxMatrixMult4x4() {
if(numArgs < 16) return false;
multMatrix({ args[ 0], args[ 4], args[ 8], args[12],
args[ 1], args[ 5], args[ 9], args[13],
args[ 2], args[ 6], args[10], args[14],
args[ 3], args[ 7], args[11], args[15] });
return true;
}
bool GPU::gxMatrixMult4x3() {
if(numArgs < 12) return false;
multMatrix({ args[ 0], args[ 3], args[ 6], args[ 9],
args[ 1], args[ 4], args[ 7], args[10],
args[ 2], args[ 5], args[ 8], args[11],
0, 0, 0, 0x1000 });
return true;
}
bool GPU::gxMatrixRotate() {
if(numArgs < 9) return false;
multMatrix({ args[0], args[3], args[6], 0,
args[1], args[4], args[7], 0,
args[2], args[5], args[8], 0,
0, 0, 0, 0x1000 });
return true;
}
bool GPU::gxMatrixScale() {
if(numArgs < 3) return false;
// Don't scale the lighting matrix
unsigned prevMode = matrixMode;
if(matrixMode == mmLitView)
matrixMode = mmModelView;
multMatrix({ args[0], 0, 0, 0,
0, args[1], 0, 0,
0, 0, args[2], 0,
0, 0, 0, 0x1000 });
matrixMode = prevMode;
return true;
}
bool GPU::gxMatrixTranslate() {
if(numArgs < 3) return false;
multMatrix({ 0x1000, 0, 0, args[0],
0, 0x1000, 0, args[1],
0, 0, 0x1000, args[2],
0, 0, 0, 0x1000 });
return true;
}
void GPU::unpackColor(int32* color, uint16 rgb) {
// Yes, it's really this crazy. PPU and GPU colors do NOT match!
// Super annoying when trying to hide things or make them seamless.
int r = 2*(rgb>> 0 & 31); r += (r>0);
int g = 2*(rgb>> 5 & 31); g += (g>0);
int b = 2*(rgb>>10 & 31); b += (b>0);
color[0] = 0x1000*r;
color[1] = 0x1000*g;
color[2] = 0x1000*b;
}
bool GPU::gxColor() {
if(numArgs < 1) return false;
unpackColor(vertex.color, args[0]);
return true;
}
bool GPU::gxNormal() {
if(numArgs < 1) return false;
// Normalize to +/- 1 range
normal[0] = int10(args[0]>> 0) << 3;
normal[1] = int10(args[0]>>10) << 3;
normal[2] = int10(args[0]>>20) << 3;
normal[3] = 0;
if(primitive.texTransform == PS::ttNormal) {
// Used for environment mapping.
textureMatrix(0,3) = vertex.texCoord[0];
textureMatrix(1,3) = vertex.texCoord[1];
vertex.texCoord[0] = normal[0];
vertex.texCoord[1] = normal[1];
vertex.texCoord[2] = normal[2];
vertex.texCoord[3] = 0x1000;
transform(textureMatrix, vertex.texCoord);
}
transform(lightMatrix, normal);
vertex.color = emission;
for(auto& light : lights) {
if(light.enable == false)
continue;
int32 dl = max(0, -dot(light.eyeVector, normal));
int32 sl = max(0, -dot(light.halfVector, normal));
sl = min(0xfff, sl*sl / 0x1000);
if(useShineTable)
sl = shininess[sl>>5] << 4;
for(unsigned n = 0; n < 3; n++) {
vertex.color[n] += (int64) specular[n] * light.color[n] * sl / 0x8000000;
vertex.color[n] += (int64) diffuse[n] * light.color[n] * dl / 0x8000000;
vertex.color[n] += (int64) ambient[n] * light.color[n] * 0x200 / 0x8000000; // ??
}
}
return true;
}
bool GPU::gxTexCoord() {
if(numArgs < 1) return false;
vertex.texCoord[0] = 0x1000 * int16(args[0]>> 0);
vertex.texCoord[1] = 0x1000 * int16(args[0]>>16);
vertex.texCoord[2] = 0x1000;
vertex.texCoord[3] = 0x1000;
// Affine texture transformations
if(primitive.texTransform == PS::ttTexCoord)
transform(textureMatrix, vertex.texCoord);
//if(primitive.texTransform == PS::ttNormal || primitive.texTransform == PS::ttVertex) {
// // Plug in U,V for environment/vertex mapping
// textureMatrix(0,3) = vertex.texCoord[0];
// textureMatrix(1,3) = vertex.texCoord[1];
//}
return true;
}
bool GPU::gxVertex3i_16() {
if(numArgs < 2) return false;
// 4.12, +/- 16 range
vertex.position[0] = int16(args[0]>> 0);
vertex.position[1] = int16(args[0]>>16);
vertex.position[2] = int16(args[1]>> 0);
submitVertex();
return true;
}
bool GPU::gxVertex3i_10() {
if(numArgs < 1) return false;
// 4.6, +/- 16 range
vertex.position[0] = int10(args[0]>> 0) << 6;
vertex.position[1] = int10(args[0]>>10) << 6;
vertex.position[2] = int10(args[0]>>20) << 6;
submitVertex();
return true;
}
bool GPU::gxVertex2i_XY() {
if(numArgs < 1) return false;
// Use same Z as before
vertex.position[0] = int16(args[0]>> 0);
vertex.position[1] = int16(args[0]>>16);
submitVertex();
return true;
}
bool GPU::gxVertex2i_XZ() {
if(numArgs < 1) return false;
// Use same Y as before
vertex.position[0] = int16(args[0]>> 0);
vertex.position[2] = int16(args[0]>>16);
submitVertex();
return true;
}
bool GPU::gxVertex2i_YZ() {
if(numArgs < 1) return false;
// Use same X as before
vertex.position[1] = int16(args[0]>> 0);
vertex.position[2] = int16(args[0]>>16);
submitVertex();
return true;
}
bool GPU::gxVertex3i_Rel() {
if(numArgs < 1) return false;
// 0.10, previous vertex +/- 0.125 relative range
vertex.position[0] = int16(vertex.position[0] + int10(args[0]>> 0));
vertex.position[1] = int16(vertex.position[1] + int10(args[0]>>10));
vertex.position[2] = int16(vertex.position[2] + int10(args[0]>>20));
submitVertex();
return true;
}
bool GPU::gxAttribute() {
if(numArgs < 1) return false;
for(unsigned n = 0; n < 4; n++)
lights[n].enable = args[0] >> n;
attributes = args[0];
return true;
}
bool GPU::gxTexImage() {
if(numArgs < 1) return false;
texImage = args[0] & 0x3fffffff;
primitive.texTransform = args[0] >> 30;
//if(primitive.type == PS::tris || primitive.type == PS::quads)
// primitive.texImage = texImage;
return true;
}
bool GPU::gxTexPalette() {
if(numArgs < 1) return false;
texPalette = args[0] & 0x1fff;
//if(primitive.type == PS::tris || primitive.type == PS::quads)
// primitive.texPalette = texPalette;
return true;
}
bool GPU::gxLightDiffuseAmbient() {
if(numArgs < 1) return false;
unpackColor(diffuse, args[0] >> 0);
unpackColor(ambient, args[0] >> 16);
// This is meant to allow display lists to be ignorant of
// the lighting state (namely whether it's in use or not).
if(args[0] & 1<<15)
unpackColor(vertex.color, args[0]);
return true;
}
bool GPU::gxLightSpecularEmission() {
if(numArgs < 1) return false;
unpackColor(specular, args[0] >> 0);
unpackColor(emission, args[0] >> 16);
useShineTable = args[0] & 1<<15;
return true;
}
bool GPU::gxLightDirection() {
if(numArgs < 1) return false;
unsigned no = args[0] >> 30;
auto &light = lights[no];
light.direction[0] = int10(args[0] >> 0);
light.direction[1] = int10(args[0] >>10);
light.direction[2] = int10(args[0] >>20);
light.direction[3] = 0;
light.eyeVector = light.direction;
transform(lightMatrix, light.eyeVector);
light.halfVector[0] = (light.eyeVector[0] + 0)/2;
light.halfVector[1] = (light.eyeVector[1] + 0)/2;
light.halfVector[2] = (light.eyeVector[2] - 0x1000)/2;
light.halfVector[3] = 0;
return true;
}
bool GPU::gxLightColor() {
if(numArgs < 1) return false;
unsigned no = args[0] >> 30;
unpackColor(lights[no].color, args[0]);
return true;
}
bool GPU::gxLightShininess() {
if(numArgs < 32) return false;
for(unsigned n = 0; n < 128; n += 4) {
shininess[n+0] = args[n/4] >> 0;
shininess[n+1] = args[n/4] >> 8;
shininess[n+2] = args[n/4] >> 16;
shininess[n+3] = args[n/4] >> 24;
}
return true;
}
bool GPU::gxBeginPrimitive() {
if(numArgs < 1) return false;
if(uploadList->numPrims < 2048
&& primitive.size >= (primitive.type==PS::quadStrip? 4 : 3))
uploadList->numPrims++; // terminate the preceding strip
primitive.type = args[0];
primitive.size = 0;
primitive.winding = 0;
primitive.attributes = attributes;
primitive.texImage = texImage;
primitive.texPalette = texPalette;
return true;
}
bool GPU::gxSwapBuffers() {
if(numArgs < 1) return false;
sceneFinished = true;
swapArgument = args[0];
return true;
}
bool GPU::gxViewport() {
if(numArgs < 1) return false;
viewport.x = (args[0]>> 0 & 0xff) + 0;
viewport.y = (args[0]>> 8 & 0xff) + 0;
viewport.w = (args[0]>>16 & 0xff) + 1 - viewport.x;
viewport.h = (args[0]>>24 & 0xff) + 1 - viewport.y;
return true;
}
bool GPU::gxCullTest() {
if(numArgs < 3) return false;
// Box range given as x, y, z, w, h, d
int16 a[6] = {
int16(args[0]>> 0), int16(args[0]>>16), int16(args[1]>> 0),
int16(args[1]>>16), int16(args[2]>> 0), int16(args[2]>>16),
};
// -> l, b, n, r, t, f
a[3] += a[0]; a[4] += a[1]; a[5] += a[2];
// Generate all 8 corners and outcodes
int32 out[8];
ClipSpaceVertex vertex[8] = { // X Y Z
{{ a[0], a[1], a[2], 0x1000 }}, // l,b,n
{{ a[0], a[1], a[5], 0x1000 }}, // l,b,f
{{ a[0], a[4], a[2], 0x1000 }}, // l,t,n
{{ a[0], a[4], a[5], 0x1000 }}, // l,t,f
{{ a[3], a[1], a[2], 0x1000 }}, // r,b,n
{{ a[3], a[1], a[5], 0x1000 }}, // r,b,f
{{ a[3], a[4], a[2], 0x1000 }}, // r,t,n
{{ a[3], a[4], a[5], 0x1000 }}, // r,t,f
};
for(unsigned n = 0; n < 8; n++) {
transform(clipMatrix, vertex[n].position);
out[n] = outcode(vertex[n]);
}
// Find out which sides of the box would be drawn
bool front = (out[0] & out[2] & out[4] & out[6]) == 0;
bool back = (out[1] & out[3] & out[5] & out[7]) == 0;
bool bottom = (out[0] & out[1] & out[4] & out[5]) == 0;
bool top = (out[2] & out[3] & out[6] & out[7]) == 0;
bool left = (out[0] & out[1] & out[2] & out[3]) == 0;
bool right = (out[4] & out[5] & out[6] & out[7]) == 0;
// True if at least one face was accepted
boxResult = front || back || bottom || top || left || right;
return true;
}
bool GPU::gxPositionTest() {
if(numArgs < 2) return false;
vertex.position[0] = int16(args[0]>> 0);
vertex.position[1] = int16(args[0]>>16);
vertex.position[2] = int16(args[1]>> 0);
vertexResult = vertex.position;
transform(clipMatrix, vertexResult);
return true;
}
bool GPU::gxDirectionTest() {
if(numArgs < 1) return false;
normalResult[0] = int10(args[0]>> 0) << 3;
normalResult[1] = int10(args[0]>>10) << 3;
normalResult[2] = int10(args[0]>>20) << 3;
normalResult[3] = 0;
transform(lightMatrix, normalResult);
return true;
}

View File

@ -1,41 +0,0 @@
bool gxMatrixMode();
bool gxMatrixPush();
bool gxMatrixPop();
bool gxMatrixStore();
bool gxMatrixRestore();
bool gxMatrixLoadIdentity();
bool gxMatrixLoad4x4();
bool gxMatrixLoad4x3();
bool gxMatrixMult4x4();
bool gxMatrixMult4x3();
bool gxMatrixRotate();
bool gxMatrixScale();
bool gxMatrixTranslate();
bool gxColor();
bool gxNormal();
bool gxTexCoord();
bool gxVertex3i_16();
bool gxVertex3i_10();
bool gxVertex2i_XY();
bool gxVertex2i_XZ();
bool gxVertex2i_YZ();
bool gxVertex3i_Rel();
bool gxAttribute();
bool gxTexImage();
bool gxTexPalette();
bool gxLightDiffuseAmbient();
bool gxLightSpecularEmission();
bool gxLightDirection();
bool gxLightColor();
bool gxLightShininess();
bool gxBeginPrimitive();
bool gxSwapBuffers();
bool gxViewport();
bool gxCullTest();
bool gxPositionTest();
bool gxDirectionTest();

View File

@ -1,316 +0,0 @@
void GPU::submitVertex() {
if(primitive.texTransform == PS::ttVertex) {
textureMatrix(0,3) = vertex.texCoord[0];
textureMatrix(1,3) = vertex.texCoord[1];
vertex.texCoord[0] = vertex.position[0];
vertex.texCoord[1] = vertex.position[1];
vertex.texCoord[2] = vertex.position[2];
vertex.texCoord[3] = 0x1000;
transform(textureMatrix, vertex.texCoord);
}
auto &v = primitive.v;
auto input = vertex;
transform(clipMatrix, input.position);
switch(primitive.type) {
case PS::tris:
v[primitive.size] = input;
if(++primitive.size >= 3)
submitTri(v[0], v[1], v[2]);
break;
case PS::triStrip:
v[min(2, primitive.size)] = input;
if(++primitive.size >= 3) {
submitTriStrip(v[0], v[1], v[2]);
primitive.winding ^= 1;
v[0] = v[1];
v[1] = v[2];
}
break;
case PS::quads:
v[primitive.size] = input;
// Slightly different order than triangles and strips
if(++primitive.size >= 4)
submitQuad(v[0], v[1], v[2], v[3]);
break;
case PS::quadStrip:
if(primitive.size % 2) v[min(3, primitive.size)] = input;
else v[min(2, primitive.size)] = input;
if(++primitive.size >= 4) {
if(primitive.size % 2 == 0) {
submitQuadStrip(v[0], v[1], v[2], v[3]);
v[0] = v[2];
v[1] = v[3];
}
}
break;
}
}
void GPU::submitTri(ClipSpaceVertex& v0, ClipSpaceVertex& v1, ClipSpaceVertex& v2) {
int32 out[] = { outcode(v0), outcode(v1), outcode(v2) };
int32 face = facing(v0, v1, v2);
if(primitive.winding == 1)
face = -face; // this can happen when clipping triangle strips
bool front = primitive.attributes & PS::drawFront;
bool back = primitive.attributes & PS::drawBack;
bool culled = face && !(front && face > 0 || back && face < 0);
bool clipped = out[0] | out[1] | out[2];
bool reject = out[0] & out[1] & out[2] | culled;
if(!reject) {
ClipSpaceVertex *vs[] = { &v0, &v1, &v2 };
addPoly(vs, 3, clipped);
}
primitive.texImage = texImage;
primitive.texPalette = texPalette;
primitive.size = 0;
}
void GPU::submitQuad(ClipSpaceVertex& v0, ClipSpaceVertex& v1, ClipSpaceVertex& v2, ClipSpaceVertex& v3) {
int32 out[] = { outcode(v0), outcode(v1), outcode(v2), outcode(v3) };
int32 face = facing(v0, v1, v2);
bool front = primitive.attributes & PS::drawFront;
bool back = primitive.attributes & PS::drawBack;
bool culled = face && !(front && face > 0 || back && face < 0);
bool clipped = out[0] | out[1] | out[2] | out[3];
bool reject = out[0] & out[1] & out[2] & out[3] | culled;
if(!reject) {
ClipSpaceVertex *vs[] = { &v0, &v1, &v2, &v3 };
addPoly(vs, 4, clipped);
}
primitive.texImage = texImage;
primitive.texPalette = texPalette;
primitive.size = 0;
}
void GPU::submitTriStrip(ClipSpaceVertex& v0, ClipSpaceVertex& v1, ClipSpaceVertex& v2) {
int32 face = facing(v0, v1, v2);
if(primitive.winding == 1)
face = -face;
// Clipping is a pain here. The easiest way that comes to mind is to check
// whether a vertex was clipped. Then if so, split this polygon off and,
// after clipping, restart the primitive with the current winding order.
bool front = primitive.attributes & PS::drawFront;
bool back = primitive.attributes & PS::drawBack;
bool culled = face != 0 && !(front && face > 0 || back && face < 0);
bool clipped = outcode(v0) | outcode(v1) | outcode(v2);
if(culled || clipped) {
if(uploadList->numPrims < 2048 && primitive.size > 3)
uploadList->numPrims++; // Terminate the current strip (if any)
submitTri(v0, v1, v2); // Submit as separate triangle
primitive.size = 2; // Next vertex restarts the strip
return;
}
if(primitive.size == 3) {
addPrim(Prim::triStrip);
addVertex(v0);
addVertex(v1);
}
addVertex(v2);
}
void GPU::submitQuadStrip(ClipSpaceVertex& v0, ClipSpaceVertex& v1, ClipSpaceVertex& v2, ClipSpaceVertex& v3) {
int32 face = facing(v0, v1, v3);
bool front = primitive.attributes & PS::drawFront;
bool back = primitive.attributes & PS::drawBack;
bool culled = face != 0 && !(front && face > 0 || back && face < 0);
bool clipped = outcode(v0) | outcode(v1) | outcode(v2) | outcode(v3);
if(culled || clipped) {
if(uploadList->numPrims < 2048 && primitive.size > 4)
uploadList->numPrims++; // Terminate the current strip (if any)
submitQuad(v0, v1, v3, v2); // Submit as separate quad
primitive.size = 2; // Next two vertices restart the strip
return;
}
if(primitive.size == 4) {
addPrim(Prim::quadStrip);
addVertex(v0);
addVertex(v1);
}
addVertex(v2);
addVertex(v3);
}
bool GPU::addPoly(ClipSpaceVertex **v, unsigned numVerts, bool clip) {
struct Entry {
ClipSpaceVertex v; unsigned out;
Entry() {}
Entry(const ClipSpaceVertex& s) : v(s), out(outcode(s)) {}
operator ClipSpaceVertex() const { return v; }
};
vector<Entry> list[2], *in = list+0, *out = list+1;
for(unsigned i = 0; i < numVerts; i++)
out->append({ *(v[i]) });
if(clip) {
for(unsigned plane = 0; plane < 6; plane++) {
const int axis = plane/2, side = plane%2? +1 : -1;
std::swap(in, out);
out->reset();
for(unsigned i = 0; i < in->size(); ) {
auto &a = (*in)[i++];
auto &b = (*in)[i % in->size()];
if(1<<plane & a.out & b.out) continue;
if(1<<plane & a.out) out->append({ clipEdge(b, a, axis, side) });
if(1<<plane & b.out) out->append({ clipEdge(a, b, axis, side) });
else out->append({ b });
}
}
}
if(!out->size())
return true;
if(!addPrim(Prim::poly))
return false;
for(unsigned i = 0; i < out->size(); i++)
addVertex((*out)[i]);
uploadList->numPrims++;
return true;
}
bool GPU::addVertex(const ClipSpaceVertex &v) {
if(uploadList->numPrims == 2048 || uploadList->numVerts == 6144)
return false;
// XYZ are all in range +/-w in homogenous coordinates
uploadList->prims[uploadList->numPrims].numVerts++;
auto &out = uploadList->verts[uploadList->numVerts++];
// xyzw = 20.12
// uv = 20.12 (in texels)
// rgb = 20.12 (0 - 63.0)
int32 w = v.position[3];
// Perspective transformation - divide everything by W.
// This flattens the perspective and allows linear pixel interpolation.
out.w = w? w : 1; // still needed to restore UVZ and RGB.
out.x = (int64) v.position[0] * 0x40000000/w; // x/w
out.y = (int64) v.position[1] * 0x40000000/w; // y/w
out.z = (int64) v.position[2] * 0x40000000/w; // z/w
out.u = (int64) v.texCoord[0] * 0x4000/w; // u/w
out.v = (int64) v.texCoord[1] * 0x4000/w; // v/w
out.r = (int64) v.color[0] * 0x4000/w; // r/w
out.g = (int64) v.color[1] * 0x4000/w; // g/w
out.b = (int64) v.color[2] * 0x4000/w; // b/w
// Apply viewport scaling. X and Y get rounded off somehow. Vertices and
// interpolants jump around a lot, despite being perspective correct. :(
out.x = (out.x + 0x40000000ll) * viewport.w/2 + 0x400000*viewport.x >> 8;
out.y = (out.y + 0x40000000ll) * viewport.h/2 + 0x400000*viewport.y >> 8;
// With perspective projections, W is a linear function of Z. Default then
// is to use the nonlinear Z/W, giving more precision close up. This setting
// gives linear depth values instead.
//
// Note: In orthographic/2D views, Z/W is linear anyway because W=1. In that
// case this option kills Z buffering (kinda bad, given the auto Y sort).
//if(uploadList->depthUseW)
// out.z = max(-0x8000, min(0x7fff, w)); // ?
return true;
}
bool GPU::addPrim(unsigned shape) {
if(uploadList->numPrims == 2048)
return false;
auto &out = uploadList->prims[uploadList->numPrims];
//primitive.texImage = texImage;
//primitive.texPalette = texPalette;
out.numVerts = 0;
out.firstVert = uploadList->numVerts;
out.id = primitive.attributes >> 24 & 63;
out.alpha = primitive.attributes >> 16 & 31;
out.drawMode = shape;
out.drawMode += Prim::fog * uint1(primitive.attributes >> 15);
out.drawMode += Prim::zequal * uint1(primitive.attributes >> 14);
out.drawMode += Prim::zwrite * uint1(primitive.attributes >> 11);
out.drawMode += Prim::shadeMask & uint2(primitive.attributes >> 4);
out.drawMode += Prim::colorKey * uint1(primitive.texImage >> 29);
out.texFormat = primitive.texImage >> 26 & 7;
//out.texSize[1] = primitive.texImage >> 23 & 7;
//out.texSize[0] = primitive.texImage >> 20 & 7;
//out.texBorder[1] = primitive.texImage >> 17 & 5;
//out.texBorder[0] = primitive.texImage >> 16 & 5;
out.texImage = primitive.texImage;
out.texPalette = primitive.texPalette;
if(out.texFormat != Prim::I2)
out.texPalette *= 2;
return true;
}
// Matrices, OpenGL style:
//
// clip projection view.. model.. vertex
// |Cx| |0 4 8 c| |0 4 8 c| |0 4 8 c| |Vx|
// |Cy| = |1 5 9 d| * |1 5 9 d| * |1 5 9 d| * |Vy|
// |Cz| |2 6 a e| |2 6 a e| |2 6 a e| |Vz|
// |Cw| |3 7 b f| |3 7 b f| |3 7 b f| |Vw|
//
// transform() multiplies new matrix on the LEFT, ie. m = t*m.
void GPU::loadMatrix(const Matrix& m) {
if(matrixMode == mmTexture) textureMatrix = m;
else {
if(matrixMode == mmLitView) lightMatrix = m;
if(matrixMode == mmProjection) projMatrix = m;
else /*mmModelView,mmLitView*/ viewMatrix = m;
clipMatrix = projMatrix;
transform(clipMatrix, viewMatrix);
}
}
void GPU::multMatrix(const Matrix& m) {
if(matrixMode == mmTexture) transform(textureMatrix, m);
else {
if(matrixMode == mmLitView) transform(lightMatrix, m);
if(matrixMode == mmProjection) transform(projMatrix, m);
else /*mmModelView,mmLitView*/ transform(viewMatrix, m);
clipMatrix = projMatrix;
transform(clipMatrix, viewMatrix);
}
}

View File

@ -1,83 +0,0 @@
uint32 regGeomStatus();
uint32 regGeomLoad();
uint32 regGeomPosition(unsigned index);
uint32 regGeomNormal(unsigned index);
uint32 regClipMatrix(unsigned index);
uint32 regLightMatrix(unsigned index);
void regGeomMaxPointDepth(uint32 data, uint32 mask);
void regGeomStatus(uint32 data, uint32 mask);
void loadMatrix(const Matrix& m);
void multMatrix(const Matrix& m);
void unpackColor(int32* color, uint16 rgb);
void submitVertex();
void submitTriStrip(ClipSpaceVertex& v0, ClipSpaceVertex& v1, ClipSpaceVertex& v2);
void submitQuadStrip(ClipSpaceVertex& v0, ClipSpaceVertex& v1, ClipSpaceVertex& v2, ClipSpaceVertex& v3);
void submitTri(ClipSpaceVertex& v0, ClipSpaceVertex& v1, ClipSpaceVertex& v2);
void submitQuad(ClipSpaceVertex& v0, ClipSpaceVertex& v1, ClipSpaceVertex& v2, ClipSpaceVertex& v3);
bool addPoly(ClipSpaceVertex **v, unsigned numVerts, bool clip);
bool addVertex(const ClipSpaceVertex &v);
bool addPrim(unsigned shape);
struct Light {
uint1 enable;
Vector color;
Vector direction;
Vector eyeVector;
Vector halfVector;
};
// Matrix state
uint2 matrixMode; enum { mmProjection, mmModelView, mmLitView, mmTexture };
bool stackOverflow;
uint1 projSP;
uint6 viewSP;
Matrix clipMatrix;
Matrix textureMatrix;
Matrix projMatrix, projStack[1];
Matrix viewMatrix, viewStack[32];
Matrix lightMatrix, lightStack[32];
Vector vertexResult;
Vector normalResult;
bool boxResult;
// Lighting and material state
Light lights[4];
Vector normal;
Vector diffuse;
Vector ambient;
Vector specular;
Vector emission;
uint8 shininess[128];
bool useShineTable;
// Primitive state
struct PS {
unsigned size;
uint1 winding;
uint2 type; enum { tris, quads, triStrip, quadStrip };
uint32 attributes; enum { drawFront = 0x80, drawBack = 0x40 };
uint32 texImage;
uint16 texPalette;
uint2 texTransform; enum { ttNone, ttTexCoord, ttNormal, ttVertex };
ClipSpaceVertex v[4];
} primitive;
uint32 attributes;
uint32 texImage;
uint16 texPalette;
ClipSpaceVertex vertex;
struct {
unsigned x, y, w, h;
} viewport;

View File

@ -1,497 +0,0 @@
#include <nds/nds.hpp>
namespace NintendoDS {
GPU gpu;
GPU::GPU() { }
void GPU::power() {
powered = true;
swapArgument = 0;
uploadList = &sceneList[0];
renderList = &sceneList[1];
renderedLines = 0;
stencilMode = false;
sceneFinished = false;
commandBufIrq = false;
packedCommands = 0;
numArgs = 0;
for(unsigned n = 0; n < 2; n++) {
sceneList[n].numVerts = 0;
sceneList[n].numPrims = 0;
sceneList[n].firstAlpha = 0;
sceneList[n].alphaSort = 1;
sceneList[n].depthUseW = 0;
memset(sceneList[n].verts, 0, sizeof sceneList[n].verts);
memset(sceneList[n].prims, 0, sizeof sceneList[n].prims);
}
// Geometry state
Matrix identity = {
0x1000, 0,0,0,
0, 0x1000, 0,0,
0,0, 0x1000, 0,
0,0,0, 0x1000,
};
clipMatrix = identity; projSP = viewSP = stackOverflow = 0;
textureMatrix = identity; matrixMode = mmProjection;
projMatrix = identity; for(auto &m : projStack) m = identity;
viewMatrix = identity; for(auto &m : viewStack) m = identity;
lightMatrix = identity; for(auto &m : lightStack) m = identity;
for(auto &l : lights) {
l.enable = false;
l.color = Vector{0,0,0,0};
l.direction = Vector{0,0,0,0};
l.eyeVector = Vector{0,0,0,0};
l.halfVector = Vector{0,0,0,0};
}
diffuse = Vector{0,0,0,0};
ambient = Vector{0,0,0,0};
specular = Vector{0,0,0,0};
emission = Vector{0,0,0,0};
useShineTable = false;
for(auto &e : shininess) e = 0;
normal = Vector{0,0,0,0};
vertex.position = Vector{0,0,0,0x1000};
vertex.texCoord = Vector{0,0,0,0x1000};
vertex.color = Vector{0x3f000,0x3f000,0x3f000,0};
primitive.size = 0;
primitive.winding = 0;
primitive.type = PS::tris;
primitive.attributes = attributes = 0;
primitive.texImage = texImage = 0;
primitive.texPalette = texPalette = 0;
primitive.texTransform = PS::ttNone;
// Render state
renderControl.texturing = false;
renderControl.toonShading = RC::darken;
renderControl.edgeMode = RC::solid;
renderControl.fogMode = 0;
renderControl.fogLevel = 0;
renderControl.alphaTest = false;
renderControl.alphaBlend = false;
renderControl.backImage = false;
backColor[0] = 0; fogColor[0] = 0;
backColor[1] = 0; fogColor[1] = 0;
backColor[2] = 0; fogColor[2] = 0;
backAlpha = 0; fogAlpha = 0;
backId = 0; fogZ = 0;
backX = 0;
backY = 0; minAlpha = 0;
backZ = 0x7fff;
for(auto &e : fogTable) e = 0;
for(auto &e : edgeTable) e = Vector{0,0,0,0};
for(auto &e : toonTable[0]) e = Vector{0,0,0,0};
for(auto &e : toonTable[1]) e = Vector{0,0,0,0};
for(unsigned n = 0; n < 2; n++) {
drawList[n].first = 0;
drawList[n].last = 0;
drawList[n].nextPrim = 0;
drawList[n].lastPrim = 0;
}
}
void GPU::scanline() {
uint8 renderTime = (video.line + 50) % 263;
if(renderTime < 241 && renderedLines < 192+1) {
// Rendering starts in advance. Draw time varies; however, up to 48 extra
// scanlines can be cached, providing a buffer for complicated scenes.
//
// That isn't emulated at the moment. Scanline rendering is still needed,
// however, to get stencilling and other edge cases right.
renderScanline();
renderedLines++;
}
if(renderTime == 242) {
// Finished rendering - process the new scene list.
// Games are free to load textures during this time.
swapBuffers();
renderedLines = -1;
drawList[0].nextPrim = 0;
drawList[0].lastPrim = renderList->firstAlpha;
drawList[0].first = 0;
drawList[0].last = 0;
drawList[1].nextPrim = renderList->firstAlpha;
drawList[1].lastPrim = renderList->numPrims;
drawList[1].first = 0;
drawList[1].last = 0;
for(auto &line : pixels) for(auto &p : line) {
p.a.a = p.b.a = 0;
p.a.id = p.b.id = 0xff;
p.az = p.bz = 0x7fffffff;
}
}
if(commandBufIrq)
arm9.interrupt.flags |= CPUCore::irqGeomBuf;
}
void GPU::swapBuffers() {
if(sceneFinished) {
std::swap(uploadList, renderList);
// These settings apply to the NEW list, not the old one.
uploadList->alphaSort = !(swapArgument >> 0);
uploadList->depthUseW = swapArgument >> 1;
uploadList->numVerts = 0;
uploadList->numPrims = 0;
// Release the ARM9 if it was blocking on a command..
sceneFinished = false;
//texCache.reset();
// Terminate the current strip, if any.
if(renderList->numPrims < 2048 && primitive.type >= 2
&& primitive.size >= (primitive.type==PS::quadStrip? 4 : 3))
renderList->numPrims++;
while(renderList->numPrims) {
auto &p = renderList->prims[renderList->numPrims-1];
// Ensure the program submitted enough vertices..
// Amusingly, this case not only crashes us, but the real GPU as well.
// Should issue a diagnostic here.
//
// This check also catches the case where vertex RAM fills up,
// leaving the last primitive unfinished.
if(p.numVerts < 3 || p.numVerts < 4 && (p.drawMode & Prim::quadStrip)) {
renderList->numPrims--;
} else {
break;
}
}
primitive.size = 0;
// Sort the new list
auto count = renderList->numPrims;
auto &prims = renderList->prims;
auto &verts = renderList->verts;
renderList->firstAlpha = 0;
for(unsigned n = 0; n < count; n++) {
renderList->order[n] = n;
// Is this per primitive or per polygon?
prims[n].minY = 255;
prims[n].maxY = 0;
// Clip range is x/w, y/w := +/- 0x40000000
for(unsigned k = 0; k < prims[n].numVerts; k++) {
int32 y = 192 - verts[prims[n].firstVert + k].y/0x400000;
prims[n].minY = min(prims[n].minY, y);
prims[n].maxY = max(prims[n].maxY, y);
}
// Special case for wireframe... only edge pixels are drawn.
if(prims[n].alpha == 0) {
prims[n].alpha = 31;
prims[n].wire = 1;
}
// Alpha is not the only thing that can send a primitive to the
// translucent pile. Using textures with the alpha channel does it too.
prims[n].blend = prims[n].alpha < 31
|| prims[n].texFormat == Prim::A3_I5
|| prims[n].texFormat == Prim::A5_I3;
if(prims[n].blend == 0)
renderList->firstAlpha++;
}
std::sort(renderList->order, renderList->order+count, [&](uint16 i0, uint16 i1) -> bool {
auto &p0 = prims[i0], &p1 = prims[i1];
if(p0.blend != p1.blend) {
return p0.blend < p1.blend; // separate solid + blend passes
}
if(p0.blend == false || renderList->alphaSort) {
// Sort polygons towards the top of the screen first. They prolly
// do this to save time and avoid searching the entire list.
if(p0.minY != p1.minY) return p0.minY < p1.minY;
if(p0.maxY != p1.maxY) return p0.maxY < p1.maxY;
}
return i0 < i1; // retain order in case of blending or ties
});
}
}
uint32 GPU::regRenderOptions() {
return renderControl.texturing<<0 | renderControl.toonShading<<1
| renderControl.alphaTest<<2 | renderControl.alphaBlend<<3
| renderControl.edgeMode<<4 | renderControl.fogMode<<6
| renderControl.fogLevel<<8 | renderControl.backImage<<14;
}
uint32 GPU::regRenderLoad() {
return 48-2;
}
void GPU::regRenderOptions(uint32 data, uint32 mask) {
if(mask & 0x00ff) {
renderControl.texturing = data >> 0;
renderControl.toonShading = data >> 1;
renderControl.alphaTest = data >> 2;
renderControl.alphaBlend = data >> 3;
renderControl.edgeMode = data >> 4;
renderControl.fogMode = data >> 6;
}
if(mask & 0x7f00) {
renderControl.fogLevel = data >> 8;
renderControl.backImage = data >> 14;
}
}
void GPU::regRenderMinAlpha(uint32 data, uint32 mask) {
minAlpha ^= (minAlpha ^ data) & mask;
}
void GPU::regRenderClearColor(uint32 data, uint32 mask) {
if(mask & 0x00007fff) {
backColor[0] = data>> 0 & 31;
backColor[1] = data>> 5 & 31;
backColor[2] = data>>10 & 31;
backFog = data>>15 & 1;
for(unsigned n = 0; n < 3; n++)
backColor[n] = 2*backColor[n] + (backColor[n] > 0);
}
if(mask & 0x001f0000) backAlpha = data>>16 & 31;
if(mask & 0x3f000000) backId = data>>24 & 63;
}
void GPU::regRenderClearCoord(uint32 data, uint32 mask) {
if(mask & 0x00007fff) backZ = data>>0;
if(mask & 0x00ff0000) backX = data>>16;
if(mask & 0xff000000) backY = data>>24;
}
void GPU::regRenderFogColor(uint32 data, uint32 mask) {
if(mask & 0x00007fff) {
fogColor[0] = data>> 0 & 31;
fogColor[1] = data>> 5 & 31;
fogColor[2] = data>>10 & 31;
for(unsigned n = 0; n < 3; n++)
fogColor[n] = 2*fogColor[n] + (fogColor[n] > 0);
}
if(mask & 0x001f0000) {
fogAlpha = data>>16 & 31;
}
}
void GPU::regRenderFogCoord(uint32 data, uint32 mask) {
if(mask & 0x00007fff) fogZ = data;
}
void GPU::regRenderFogTable(unsigned index, uint32 data, uint32 mask) {
if(mask & 0x000000ff) fogTable[4*index+0] = data >> 0 & 0x7f;
if(mask & 0x0000ff00) fogTable[4*index+1] = data >> 8 & 0x7f;
if(mask & 0x00ff0000) fogTable[4*index+2] = data >> 16 & 0x7f;
if(mask & 0xff000000) fogTable[4*index+3] = data >> 24 & 0x7f;
fogTable[32] = fogTable[31]; // extra element for lerping
}
void GPU::regRenderEdgeTable(unsigned index, uint32 data, uint32 mask) {
//uint6 r0 = 2*uint5(data>> 0), r1 = 2*uint5(data>>16);
//uint6 g0 = 2*uint5(data>> 5), g1 = 2*uint5(data>>21);
//uint6 b0 = 2*uint5(data>>10), b1 = 2*uint5(data>>26);
//
//if(r0) r0++; if(r1) r1++;
//if(g0) g0++; if(g1) g1++;
//if(b0) b0++; if(b1) b1++;
//
//auto *edge = &edgeTable[2*index];
//
//if(mask & 0x0000ffff) edge[0] = b0<<12 | g0<<6 | r0<<0;
//if(mask & 0xffff0000) edge[1] = b1<<12 | g1<<6 | r1<<0;
if(mask & 0x0000ffff) unpackColor(edgeTable[2*index+0], data>>0);
if(mask & 0xffff0000) unpackColor(edgeTable[2*index+1], data>>16);
}
void GPU::regRenderToonTable(unsigned index, uint32 data, uint32 mask) {
auto &d0 = toonTable[0][2*index+0];
auto &d1 = toonTable[0][2*index+1];
auto &l0 = toonTable[1][2*index+0];
auto &l1 = toonTable[1][2*index+1];
if(mask & 0x0000ffff) unpackColor(d0, data>>0);
if(mask & 0xffff0000) unpackColor(d1, data>>16);
// Generate entries for lighten mode
l0 = d0; l1 = d1;
l0[0] += 0x1000; l1[0] += 0x1000;
l0[1] += 0x1000; l1[1] += 0x1000;
l0[2] += 0x1000; l1[2] += 0x1000;
}
void GPU::regGeomMaxPointDepth(uint32 data, uint32 mask) {
}
void GPU::regGeomStatus(uint32 data, uint32 mask) {
data &= mask;
if(data & 1<<15) {
stackOverflow = false;
projSP = 0;
viewSP = 0;
}
if(mask & 0xff000000) {
commandBufIrq = data>>30;
if(commandBufIrq)
arm9.interrupt.flags |= CPUCore::irqGeomBuf;
}
}
uint32 GPU::regGeomStatus() {
return boxResult<<1 | (viewSP % 32)<<8 | projSP<<13 | stackOverflow<<15
| 1<<25 | 1<<26 | commandBufIrq<<30;
}
uint32 GPU::regGeomLoad() {
return uploadList->numPrims<<0 | uploadList->numVerts<<16;
}
uint32 GPU::regGeomPosition(unsigned index) {
return vertexResult[index];
}
uint32 GPU::regGeomNormal(unsigned index) {
return uint16(int12(normalResult[index/2 + 0]))<<0
| uint16(int12(normalResult[index/2 + 1]))<<16;
}
uint32 GPU::regClipMatrix(unsigned index) {
uint2 col = index / 4;
uint2 row = index % 4;
return clipMatrix[4*row + col];
}
uint32 GPU::regLightMatrix(unsigned index) {
uint2 col = index / 3;
uint2 row = index % 3;
return lightMatrix[4*row + col];
}
void GPU::sendGeomBuffered(uint32 data) {
// On real hardware commands are buffered, and filling up this buffer
// forces an indefinite waitstate until the buffer drains. A swapBuffers
// command sits around 'til vblank, thereby throttling the game at 60fps.
// Since we don't emulate that yet, this is the closest thing.
while(sceneFinished) {
arm9.step(16);
if(arm7.clock < -255)
co_switch(arm7.thread);
}
// Command bytes are packed 4 to a word..
if(packedCommands) args[numArgs++] = data;
else packedCommands = data;
// Take the first one and try to run it.
// It succeeds when enough arguments are provided.
//
// It's important to run ALL possible commands here, since some of them
// don't consume any arguments and the next invocation would erroneously
// pass data to the wrong command.
while(packedCommands && geomCommand(packedCommands)) {
numArgs = 0;
packedCommands >>= 8;
}
if(commandBufIrq)
arm9.interrupt.flags |= CPUCore::irqGeomBuf;
}
void GPU::sendGeomImmediate(uint8 command, uint32 data) {
while(sceneFinished) {
arm9.step(16);
if(arm7.clock < -255)
co_switch(arm7.thread);
}
args[numArgs++] = data;
if(geomCommand(command)) {
numArgs = 0;
}
if(commandBufIrq)
arm9.interrupt.flags |= CPUCore::irqGeomBuf;
}
bool GPU::geomCommand(uint8 command) {
switch(command) {
case 0x10: return gxMatrixMode();
case 0x11: return gxMatrixPush();
case 0x12: return gxMatrixPop();
case 0x13: return gxMatrixStore();
case 0x14: return gxMatrixRestore();
case 0x15: return gxMatrixLoadIdentity();
case 0x16: return gxMatrixLoad4x4();
case 0x17: return gxMatrixLoad4x3();
case 0x18: return gxMatrixMult4x4();
case 0x19: return gxMatrixMult4x3();
case 0x1a: return gxMatrixRotate();
case 0x1b: return gxMatrixScale();
case 0x1c: return gxMatrixTranslate();
case 0x20: return gxColor();
case 0x21: return gxNormal();
case 0x22: return gxTexCoord();
case 0x23: return gxVertex3i_16();
case 0x24: return gxVertex3i_10();
case 0x25: return gxVertex2i_XY();
case 0x26: return gxVertex2i_XZ();
case 0x27: return gxVertex2i_YZ();
case 0x28: return gxVertex3i_Rel();
case 0x29: return gxAttribute();
case 0x2a: return gxTexImage();
case 0x2b: return gxTexPalette();
case 0x30: return gxLightDiffuseAmbient();
case 0x31: return gxLightSpecularEmission();
case 0x32: return gxLightDirection();
case 0x33: return gxLightColor();
case 0x34: return gxLightShininess();
case 0x40: return gxBeginPrimitive();
case 0x50: return gxSwapBuffers();
case 0x60: return gxViewport();
case 0x70: return gxCullTest();
case 0x71: return gxPositionTest();
case 0x72: return gxDirectionTest();
}
return true;
}
#include "math.cpp"
#include "commands.cpp"
#include "geometry.cpp"
#include "render.cpp"
#include "textures.cpp"
}

View File

@ -1,77 +0,0 @@
struct GPU {
GPU();
void power();
void scanline();
void swapBuffers();
void sendGeomBuffered(uint32 data);
void sendGeomImmediate(uint8 command, uint32 data);
bool geomCommand(uint8 command);
// Once the geometry submitted by the program gets through transform,
// lighting, culling, and clipping, we've built a SceneList with:
struct Vertex {
// A single point within a primitive.
int64 x, y, z; // normalized device coordinates (xyz/w => -1..+1)
int32 u, v; // texture coordinates (u/w, v/w)
int32 r, g, b; // color (r/w, g/w, b/w)
int32 w; //
};
struct Prim {
// A strip of triangles or quads - or a single, clipped n-gon.
// AFAIK, none of these attributes can change between primitives.
uint16 firstVert, numVerts;
uint8 minY, maxY;
uint1 blend, wire;
uint8 drawMode; enum { shadeMask=0x03, normal=0, decal=1, toon=2, stencil=3,
poly=0, triStrip=0x04, quadStrip=0x08,
colorKey=0x10, zequal=0x20, zwrite=0x40, fog=0x80 };
uint8 id, alpha;
uint8 texFormat; enum { untextured, A3_I5, I2,I4,I8, packed, A5_I3, A1_RGB15 };
uint8 texBorder[2]; enum { clamp=0, repeat=1, clamp_=4, mirror=5 };
uint8 texSize[2]; // 8 << texSize
uint16 texPalette;
uint32 texImage;
};
struct SceneList {
unsigned numVerts;
unsigned numPrims;
unsigned firstAlpha;
Vertex verts[6144];
Prim prims[2048];
uint16 order[2048]; // by solid first, top Y, bottom Y, then index
uint1 alphaSort; // Sort transparent geometry by Y coordinate?
uint1 depthUseW; // Use Z or W for depth buffering?
};
uint1 powered;
uint2 swapArgument;
SceneList sceneList[2];
SceneList *uploadList;
SceneList *renderList;
bool sceneFinished;
uint2 commandBufIrq;
int32 renderedLines;
uint32 packedCommands;
uint32 args[32], numArgs;
uint32 output[256*192]; // ARGB5666
struct Matrix;
struct Vector;
struct ClipSpaceVertex;
#include "math.hpp"
#include "commands.hpp"
#include "geometry.hpp"
#include "render.hpp"
};
extern GPU gpu;

View File

@ -1,143 +0,0 @@
// Flips a 4x4 matrix along the diagonal:
// |a b c d| |a e i m|
// |e f g h| -> |b f j n|
// |i j k l| |c g k o|
// |m n o p| |d h l p|
void GPU::Matrix::transpose() {
auto &m = *this;
std::swap(m(0,1), m(1,0));
std::swap(m(0,2), m(2,0));
std::swap(m(0,3), m(3,0));
std::swap(m(1,2), m(2,1));
std::swap(m(1,3), m(3,1));
std::swap(m(2,3), m(3,2));
}
// Product of two matrices: M := M * T.
void GPU::transform(Matrix& m, Matrix t) {
t.transpose();
for(unsigned i = 0; i < 4; i++)
transform(t, m(i));
}
// Product of 4x4 matrix and 4D vector: v := T * v.
void GPU::transform(const Matrix& t, int32* v) {
int32 vector[] = { v[0], v[1], v[2], v[3] };
for(unsigned i = 0; i < 4; i++)
v[i] = dot(t(i), vector);
}
// Dot product of two 4D vectors: s := a . b.
int32 GPU::dot(const int32* a, const int32* b) {
return ( (int64) a[0] * b[0]
+ (int64) a[1] * b[1]
+ (int64) a[2] * b[2]
+ (int64) a[3] * b[3] )
/ 0x1000;
}
// Given a 4D homogenous coordinate, returns a mask telling whether
// it's out of bounds. One bit per side of the clipping volume.
unsigned GPU::outcode(const ClipSpaceVertex &v) {
const int32 &x = v.position[0], &y = v.position[1];
const int32 &z = v.position[2], &w = v.position[3];
unsigned code = 0;
if(x < -w) code += 1; if(x > +w) code += 2; // left, right
if(y < -w) code += 4; if(y > +w) code += 8; // bottom, top
if(z < -w) code += 16; if(z > +w) code += 32; // near, far
return code;
}
// Looks at three triangle vertices and returns a signed value:
// negative (back face), zero (edge on), or positive (front face).
//
// AKA sign of Z from the cross product (v1-v0) x (v2-v1) in screen coordinates:
// Z = (v1-v0).x (v2-v1).y - (v1-v0).y (v2-v1).x
int GPU::facing(const ClipSpaceVertex &v0, const ClipSpaceVertex &v1, const ClipSpaceVertex &v2) {
// These are in homogenous space, so.. that's more like:
// Z = a + b - c, where
// a = (v0.x v1.y - v0.y v1.x) / (v0.w v1.w)
// b = (v1.x v2.y - v1.y v2.x) / (v1.w v2.w)
// c = (v0.x v2.y - v0.y v2.x) / (v0.w v2.w)
//
// Since only the sign matters,
// S = (a+b-c <=> 0) = (a+b <=> c)
//
// and knowing w is positive within the clipping volume,
// p = v0.w v1.w v2.w
// a' = a*p, b' = b*p, c' = c*p
//
// that makes the divisions disappear.
// x y y x w
int64 a = ((int64) v0.position[0]*v1.position[1] - (int64) v0.position[1]*v1.position[0]) / 0x1000 * v2.position[3];
int64 b = ((int64) v1.position[0]*v2.position[1] - (int64) v1.position[1]*v2.position[0]) / 0x1000 * v0.position[3];
int64 c = ((int64) v0.position[0]*v2.position[1] - (int64) v0.position[1]*v2.position[0]) / 0x1000 * v1.position[3];
int64 r = a + b - c;
return r<0? -1 : r>0? +1 : 0;
}
// a is inside the clipping plane
// b is outside, and will be clipped to the
// intersection between AB and the clip plane.
// axis controls which of X,Y,Z is tested.
// side affects the sign of the W comparand.
GPU::ClipSpaceVertex GPU::clipEdge(const ClipSpaceVertex &a, const ClipSpaceVertex &b, unsigned axis, int side) {
// Normally, to find the intersection we would attempt to find the fraction
// of the edge that remains unclipped:
// t = (1 - ax) / (bx - ax)
//
// The problem is that we haven't divided by W. We end up with this mess:
// t = (1 - ax/aw) / (bx/bw - ax/aw)
//
// Not very helpful. Besides which we still need W for perspective-correct
// texturing.
//
// In homogenous space, the clipping volume is defined by:
// -w < {x,y,z} < +w
//
// So we expect the clipped vertex c to have:
// cx = +/-cw ; cx,cy,cz depending on the plane
//
// And for each {x,y,z,w}:
// cx = ax + t*(bx - ax)
//
// Expanding both sides of the first equation:
// ax + t*(bx - ax) = aw + t*(bw - aw)
// t*(bx - ax) - t*(bw - aw) = aw - ax
// t = (aw - ax) / ((bx - ax) - (bw - aw))
//
// We end up with these two equations:
// t = (+aw - ax) / ((+aw - ax) - (+bw - bx)) ; cx=+cw
// t = (-aw - ax) / ((-aw - ax) - (-bw - bx)) ; cx=-cw
//
// or more simply:
// t = (ax - +aw) / ((ax - +aw) - (bx - +bw)) ; cx=+cw
// t = (ax - -aw) / ((ax - -aw) - (bx - -bw)) ; cx=-cw
//
// t = (ax +/- aw) / ((ax +/- aw) - (bx +/- bw))
//
// So clipping requires one division per edge that crosses a clipping plane.
// Quite pleasant, actually.
ClipSpaceVertex c;
int32 ax_aw = a.position[axis] - side*a.position[3];
int32 bx_bw = b.position[axis] - side*b.position[3];
if(ax_aw - bx_bw) {
int64 t = 0x40000000ll * ax_aw / (ax_aw - bx_bw);
// t should be between 0 and 1. Rather than being a linear function of X/Y/Z
// it also includes W. But at this point it's just an interpolation.
for(unsigned n = 0; n < 4; n++) {
c.position[n] = a.position[n] + int64(b.position[n] - a.position[n]) * t/0x40000000;
c.texCoord[n] = a.texCoord[n] + int64(b.texCoord[n] - a.texCoord[n]) * t/0x40000000;
c.color[n] = a.color[n] + int64(b.color[n] - a.color[n] ) * t/0x40000000;
}
// Ensure vertex is exactly on the clipping plane,
// so we don't try and clip it again.
c.position[axis] = side*c.position[3];
}
return c;
}

View File

@ -1,53 +0,0 @@
struct Vector {
int32 v[4];
Vector() { }
Vector(const std::initializer_list<int32>& elems) {
unsigned n = 0;
for(auto i : elems) {
if(n == 4) break;
v[n++] = i;
}
}
inline int32& operator[](unsigned i) { return v[i]; }
inline const int32& operator[](unsigned i) const { return v[i]; }
inline operator int32*() { return v; }
};
struct Matrix {
int32 m[4*4];
Matrix() { }
Matrix(const std::initializer_list<int32>& elems) {
unsigned n = 0;
for(auto i: elems)
if(n < 4*4) m[n++] = i;
}
inline int32& operator[](unsigned i) { return m[i]; }
inline int32& operator()(unsigned i, unsigned j) { return m[4*i + j]; }
inline int32* operator()(unsigned i) { return &m[4*i]; }
inline const int32& operator[](unsigned i) const { return m[i]; }
inline const int32& operator()(unsigned i, unsigned j) const { return m[4*i + j]; }
inline const int32* operator()(unsigned i) const { return &m[4*i]; }
void transpose();
};
struct ClipSpaceVertex {
Vector position;
Vector texCoord;
Vector color;
};
static void transform(Matrix& m, Matrix t);
static void transform(const Matrix& t, int32* v);
static int32 dot(const int32* a, const int32* b);
static int facing(const ClipSpaceVertex &v0, const ClipSpaceVertex &v1, const ClipSpaceVertex &v2);
static unsigned outcode(const ClipSpaceVertex &v);
static ClipSpaceVertex clipEdge(const ClipSpaceVertex &b, const ClipSpaceVertex &a, unsigned axis, int side);

View File

@ -1,578 +0,0 @@
void GPU::updateDrawList(int y, DrawList& list, bool sorted) {
// Blah, why are polys with numVerts==0 making it in?
// Remove polygons that have finished rendering
while(list.first < list.last && (!list.polys[list.first].numVerts
|| 192-list.polys[list.first].v[list.polys[list.first].numVerts-1]->y/0x400000 < y))
list.first++;
// Add primitives matching Y as their first line
unsigned pi = list.nextPrim;
while(pi < list.lastPrim) {
auto *prim = &renderList->prims[renderList->order[pi]];
if(sorted && y < prim->minY-1)
break;
if(!sorted || y == prim->minY-1) {
auto *v = &renderList->verts[prim->firstVert];
if(prim->drawMode & Prim::triStrip) {
for(unsigned n = 2; n < prim->numVerts; n += 1, v += 1)
setupPoly(list.polys[list.last++], prim, v, 3);
}
else if(prim->drawMode & Prim::quadStrip) {
for(unsigned n = 2; n < prim->numVerts; n += 2, v += 2)
setupPoly(list.polys[list.last++], prim, v, 4);
}
else {
setupPoly(list.polys[list.last++], prim, v, prim->numVerts);
}
if(list.nextPrim == pi)
list.nextPrim++;
}
pi++;
}
}
void GPU::setupPoly(ActivePoly& poly, Prim* prim, Vertex *vs, unsigned numVerts) {
for(unsigned n = 0; n < numVerts; n++)
poly.v[n] = &vs[n];
// Sort vertices by Y, then X if on same line
std::sort(poly.v, poly.v+numVerts, [](Vertex *a, Vertex *b)
{ return a->y != b->y? a->y > b->y : a->x < b->x; });
poly.numVerts = numVerts;
if(!numVerts) return;
poly.p = prim;
poly.lv = poly.next_lv = 0; // start at top vertex
poly.rv = poly.next_rv = 0;
poly.side[0] = 0; // top and bottom are
poly.side[numVerts-1] = 0; // considered to be on both sides.
// Find out which side each vertex is on. This is based on the observation
// that with a convex polygon, edges always curve inwards, towards the other
// side. So we switch sides every time the curve changes direction.
for(int n = 1; n < numVerts-1; n++) {
int64 dx0 = poly.v[n]->x - poly.v[n-1]->x;
int64 dx1 = poly.v[n+1]->x - poly.v[n]->x;
int64 dy0 = -(poly.v[n]->y - poly.v[n-1]->y);
int64 dy1 = -(poly.v[n+1]->y - poly.v[n]->y);
poly.side[n] = poly.side[n-1]; // special case for straight or co-linear edges
if(!dy0) dy0 = 1;
if(!dy1) dy1 = 1;
// compare dx1/dy1 <=> dx0/dy0
// - both sides have been multiplied by dy0*dy1 to avoid division by 0
if(dx1*dy0 > dx0*dy1) poly.side[n] = -1; // curves to the right, so must be on the left
if(dx1*dy0 < dx0*dy1) poly.side[n] = +1; // curves to the left, so must be on the right
}
setupNextEdge(poly, poly.lv, poly.next_lv, -1, poly.lnext, poly.dl_dv);
setupNextEdge(poly, poly.rv, poly.next_rv, +1, poly.rnext, poly.dr_dv);
poly.prev_lx = poly.lnext.x;
poly.prev_rx = poly.rnext.x;
}
void GPU::setupNextEdge(ActivePoly& poly, unsigned& vi, unsigned& next,
int side, Interpolants& edge, Interpolants& gradient)
{
vi = next++;
while(poly.side[next]*side < 0)
next++;
Interpolants endpoint;
assignEdge(edge, poly.v[vi]); edge.x &= ~0x3fffff;
assignEdge(endpoint, poly.v[next]); endpoint.x &= ~0x3fffff;
int32 y0 = 192 - poly.v[vi]->y/0x400000;
int32 y1 = 192 - poly.v[next]->y/0x400000;
int32 numLines = y1 - y0;
if(numLines) {
for(unsigned j = 0; j < Interpolants::count; j++)
gradient[j] = (endpoint[j] - edge[j]) / numLines;
}
}
void GPU::assignEdge(Interpolants& edge, Vertex* v) {
edge.r = v->r; edge.u = v->u;
edge.g = v->g; edge.v = v->v;
edge.b = v->b; edge.z = v->z;
edge.inv_w = 0x80000000000/max(1,v->w);
edge.x = v->x;
}
void GPU::renderScanline() {
int y = renderedLines;
auto *line = 1 + pixels[y-0 & 3];
auto *line1 = 1 + pixels[y-1 & 3];
auto *line2 = 1 + pixels[y-2 & 3];
updateDrawList(y, drawList[0], true);
updateDrawList(y, drawList[1], renderList->alphaSort);
unsigned backOffs = 0x40000 + 512*(backY+y & 0xff);
uint16 *backImageColor = &system.vmap.tex[backOffs+0x00000 >> 14][backOffs+0x00000];
uint16 *backImageDepth = &system.vmap.tex[backOffs+0x20000 >> 14][backOffs+0x20000];
for(int x = -1; x <= 256; x++) {
auto &px = line[x];
uint16 color = backImageColor[backX+x & 0xff];
uint16 depth = backImageDepth[backX+x & 0xff];
// Stencil is preserved, even between frames!
// Transitioning to stencilMode=1 is what clears it.
px.flags &= Pixel::stencil;
px.flags &= ~Pixel::blends;
if(renderControl.backImage) {
px.a.r = color<<1 & 62; px.a.r += px.a.r > 0;
px.a.g = color>>4 & 62; px.a.g += px.a.g > 0;
px.a.b = color>>9 & 62; px.a.b += px.a.b > 0;
px.a.a = color>>15? 31 : 0;
px.a.id = backId;
px.az = depth<<16 & 0x7fff0000;
if(depth & 1<<15) px.flags |= Pixel::fog;
}
else {
px.a.r = backColor[0];
px.a.g = backColor[1];
px.a.b = backColor[2];
px.a.a = backAlpha;
px.a.id = backId;
px.az = backZ<<16;
if(backFog) px.flags |= Pixel::fog;
}
px.b.a = 0;
px.b.id = 0xff;
px.bz = 0x7fffffff;
px.sz = 0x7fffffff;
}
renderDrawList(y, drawList[0], false);
renderDrawList(y, drawList[1], true);
if(y < 1) return;
y--; // edge filter requires 1 line latency
int32 fogZ = this->fogZ << 16;
uint32 *__restrict dest = &output[256*y];
for(int x = 0; x < 256; x++) {
// Any remaining fragments are combined here... or maybe this should
// be done between solid/alpha passes? I'm not sure if you can get a
// transparent pixel between a solid and a smooth edge, and still have
// it look right.
auto &px = line1[x];
auto &above = px.a;
auto &below = px.b;
uint8 r = above.r;
uint8 g = above.g;
uint8 b = above.b;
uint8 a = above.a;
uint8 id = above.id;
int64 z = px.az;
if(px.flags & Pixel::edge) {
auto &up = line2[x];
auto &left = line1[x-1], &right = line1[x+1];
auto &down = line[x];
// Toon edge filter - need to verify logic here, might be &&, etc.
if( z < up.az && id != up.a.id || z < down.az && id != down.a.id
|| z < left.az && id != left.a.id || z < right.az && id != right.a.id
) {
Vector outline = edgeTable[id/8];
if(renderControl.edgeMode & RC::toon) {
// Replace RGB with values from the table.
// Is there an option to disable this for some polygons?
r = outline[0] / 0x1000;
g = outline[1] / 0x1000;
b = outline[2] / 0x1000;
// In this case, alpha is also overwritten. Edges that don't
// pass the filter remain antialiased, as in "smooth" mode.
if(renderControl.edgeMode == RC::blended)
a = 16; // fixed value - or is there a register?
}
}
}
if(below.a /*not empty*/) {
if(below.id == id && a < 31 && (px.flags & Pixel::blends)) {
// Don't blend equal object IDs
r = below.r;
g = below.g;
b = below.b;
a = below.a;
}
else if(renderControl.alphaBlend || !(px.flags & Pixel::blends)) {
// Z blends, too! You can see this if you set fogAlpha=0
// with edge smoothing on.
z = px.bz + (z - px.bz) * (a+1) / 32u;
// They cheaped out here. To get correct Photoshop-style alpha
// (non pre-multiplied) requires dividing by A. It's nasty.
r = below.r + (r - below.r) * (a+1) / 32u;
g = below.g + (g - below.g) * (a+1) / 32u;
b = below.b + (b - below.b) * (a+1) / 32u;
a = max(a, below.a);
}
}
// Polygons can be fogged toward a constant RGBA, or optionally just
// their alpha channel. Unlike most systems, this is a per-pixel effect
// based on the Z buffer.
if(px.flags & Pixel::fog) {
// Z := 0..0x7fffffff
int32 dist = z-fogZ >> 16-renderControl.fogLevel;
dist = max(0, min(0x7fff, dist));
int32 ifog = fogTable[dist/0x400u];
ifog += (fogTable[dist/0x400u + 1] - ifog) * (dist & 0x3ff)/0x400u;
if(renderControl.fogMode == RC::color) {
r += (fogColor[0] - r) * (ifog+1)/0x80u;
g += (fogColor[1] - g) * (ifog+1)/0x80u;
b += (fogColor[2] - b) * (ifog+1)/0x80u;
}
a += (fogAlpha - a) * (ifog+1)/0x80u;
}
*dest++ = a<<18 | b<<12 | g<<6 | r<<0;
}
}
void GPU::renderDrawList(int y, DrawList& list, bool blend) {
auto *line = 1 + pixels[y & 3];
unsigned alphaTest = minAlpha * renderControl.alphaTest;
for(unsigned n = list.first; n < list.last; n++) {
auto &poly = list.polys[n];
if(!poly.numVerts) continue;
int y0 = 192-poly.v[0]->y/0x400000;
int y1 = 192-poly.v[poly.numVerts-1]->y/0x400000;
if(y < y0-1 || y >= y1) continue;
while(y == 192-poly.v[poly.next_lv]->y/0x400000-1 && poly.next_lv < poly.numVerts-1)
setupNextEdge(poly, poly.lv, poly.next_lv, -1, poly.lnext, poly.dl_dv);
while(y == 192-poly.v[poly.next_rv]->y/0x400000-1 && poly.next_rv < poly.numVerts-1)
setupNextEdge(poly, poly.rv, poly.next_rv, +1, poly.rnext, poly.dr_dv);
// X range for previous, current, and next line
// We need this to determine "edge" cases (sorry) ->
int px0 = poly.prev_lx / 0x400000; // Top: x in x0..prev_x0
int px1 = poly.prev_rx / 0x400000; // or prev_x1..x1
int x0 = poly.left.x / 0x400000; // Bottom: x in x0..next_x0
int x1 = poly.right.x / 0x400000; // or next_x1..x1
int nx0 = poly.lnext.x / 0x400000; // Left: x+0 == x0
int nx1 = poly.rnext.x / 0x400000; // Right: x+1 == x1
// Theory:
// A pixel is marked "edge" on one of the above conditions. Translucent
// (alpha < 31) polygons do not modify the edge flags. Smooth edges have
// their alpha set to the fraction of pixel covered.
// Test for dropout - DS doesn't have explicit line or point primitives;
// to draw them, you simply use geometry thinner than 1px. The rasterizer
// ALMOST never leaves holes.. even in extremely narrow triangles. Very
// occasionally, degenerate triangles will have gaps.
if(x0 > px1) x0 = px1;
if(x1 < px0) x1 = px0;
if(x1 <= x0) x1 = x0;
if(y >= y0) {
Texture *tex = nullptr;
if(renderControl.texturing)
tex = getTexture(poly.p->texImage, poly.p->texPalette);
Interpolants i = poly.left; // RGB, UVZW at current pixel
Interpolants delta; // Gradient from left to right edges
if(x1 > x0)
for(unsigned j = 0; j < Interpolants::count; j++)
delta[j] = (poly.left[j] - poly.right[j]) / (x0-x1);
uint8 id = poly.p->id;
int drawMode = poly.p->drawMode;
int shadeMode = drawMode & Prim::shadeMask;
if(!renderControl.fogMode)
drawMode &= ~Prim::fog;
if((drawMode & Prim::shadeMask) == Prim::stencil) {
if(id) {
stencilMode = false; // Draw to screen, using stencil bits as mask.
}
else if(stencilMode == false) {
stencilMode = true; // Draw to stencil buffer.
// Toggling into stencil mode with ID=0 was observed to clear the
// stencil buffer. This IS affected by polygon sorting so it will
// not happen on every line, or even every frame, unless so arranged!
//
// The buffer holds data for only 2 lines, and if not cleared, then
// old data is used. So shadow volumes must be closed to work.
for(unsigned x = 0; x < 256; x++)
line[x].flags &= ~Pixel::stencil;
}
}
if(i.inv_w < 0x2000) i.inv_w = 0x2000;
// Hrm. Too many variables here, this should be split up. Perhaps:
// pass 1: calculate edge coverage (a==31 only)
// pass 2: if z <= bz, calculate [argbf]z (modulate, decal, toon, stencil)
// pass 3: do atest, ztest (less, equal), merge with line buffer (a==31, a<31)?
for(unsigned x = x0; x <= x1; x++) {
int32 w = 0x80000000000ll / max(2ll, i.inv_w);
int32 z = i.z + 0x40000000; // z/w
// Should find some way to justify this - <<7 was Selected for
// suitable fog in Mario Kart's Chocolate Mountain course..
if(renderList->depthUseW)
z = w<<7;
// Unproject the interpolated values
int32 u = ((int64) i.u * w)/0x40000000;
int32 v = ((int64) i.v * w)/0x40000000;
Vector color = {
min(63, (int64) i.r * w/0x4000000),
min(63, (int64) i.g * w/0x4000000),
min(63, (int64) i.b * w/0x4000000),
poly.p->alpha
};
i.u += delta.u; i.r += delta.r;
i.v += delta.v; i.g += delta.g;
i.z += delta.z; i.b += delta.b;
i.inv_w += delta.inv_w;
bool edge = false;
if(blend == false) {
edge = x0 <= x && (x <= px0 || x <= nx0)
|| (px1 <= x || nx1 <= x) && x <= x1
|| x == x0 || x == x1;
if(renderControl.edgeMode & RC::smooth) {
if(x == x0) color[3] = 31-(poly.left.x>>17 & 31);
if(x == x1) color[3] = (poly.right.x>>17 & 31);
}
}
Vector texColor = { 63, 63, 63, 31 };
if(tex) {
u = max(0, min(tex->width-1, u & tex->umask));
v = max(0, min(tex->height-1, v & tex->vmask));
uint32 abgr = tex->image[tex->width*v + u];
texColor[0] = 2*(abgr>> 0 & 31) + 1;
texColor[1] = 2*(abgr>> 5 & 31) + 1;
texColor[2] = 2*(abgr>>10 & 31) + 1;
texColor[3] = (abgr>>15);
}
auto &px = line[x];
auto &above = px.a;
auto &below = px.b;
bool ztest_a = drawMode & Prim::zequal? z == px.az : z < px.az;
bool ztest_b = drawMode & Prim::zequal? z == px.bz : z < px.bz;
if(shadeMode == Prim::stencil) {
if(stencilMode) {
// Draw to stencil buffer, not the screen
// Does texturing work here? Alpha, maybe?
if(ztest_a == false) {
px.flags |= Pixel::stencil;
px.sz = px.az;
}
continue;
}
// Mask polygons using the stencil buffer
if(px.flags & Pixel::stencil) {
// Succeeded - force Z onto the fragment being shadowed
z = px.sz;
if(id == above.id) {
// FF IV surrounds characters in shadow volumes, and uses the ID
// to prevent self-shadowing. So technically this fragment is on
// top, yet the DS somehow draws it underneath the model. If not
// done this way, edge smoothing would clash with the shadow.
ztest_a = false;
ztest_b = true;
z = px.bz;
}
} else {
// Failed stencil test
continue;
}
}
if(shadeMode == Prim::toon) {
// Look up shade based on the red channel
int a = color[3];
color = toonTable[renderControl.toonShading][color[0]/2 & 31];
color[0] /= 0x1000u;
color[1] /= 0x1000u;
color[2] /= 0x1000u;
color[3] = a;
}
if(shadeMode == Prim::decal) {
color[0] += (texColor[0] - color[0]) * (texColor[3]+1) / 32u;
color[1] += (texColor[1] - color[1]) * (texColor[3]+1) / 32u;
color[2] += (texColor[2] - color[2]) * (texColor[3]+1) / 32u;
} else {
color[0] = color[0] * texColor[0] / 64u;
color[1] = color[1] * texColor[1] / 64u;
color[2] = color[2] * texColor[2] / 64u;
color[3] = (1 + color[3]) * texColor[3] / 32u;
}
if(color[3] <= renderControl.alphaTest)
continue;
if(blend) {
if(ztest_a) {
if(above.id == id) continue;
if(!renderControl.alphaBlend) {
// Mario Kart uses this on the car selection screen. The alpha
// and coverage are still retained and used for PPU blending.
above.r = color[0];
above.g = color[1];
above.b = color[2];
above.a = max(color[3], px.a.a);
above.id = id;
if(!(drawMode & Prim::fog)) px.flags &= ~Pixel::fog;
if(drawMode & Prim::zwrite) px.az = z;
continue;
}
if(below.a /*not empty*/) {
if(below.id != above.id || color[3] == 31 || !(px.flags & Pixel::blends)) {
// Merge the top pixel down to make room
px.bz += int64(px.az - px.bz) * (above.a + 1)/32u;
below.r += (above.r - below.r) * (above.a + 1)/32u;
below.g += (above.g - below.g) * (above.a + 1)/32u;
below.b += (above.b - below.b) * (above.a + 1)/32u;
below.a = max(above.a, below.a);
below.id = above.id;
}
} else {
// Bottom empty or top solid, simply push down
below = above;
px.bz = px.az;
}
// Then write the top one
above.r = color[0];
above.g = color[1];
above.b = color[2];
above.a = color[3];
above.id = id;
// AND with previous fog bit
if(!(drawMode & Prim::fog)) px.flags &= ~Pixel::fog;
if(drawMode & Prim::zwrite) px.az = z;
px.flags |= Pixel::blends;
}
else if(ztest_b) {
if(below.a /*not empty*/) {
if(below.id == id && color[3] < 31) continue;
if(!renderControl.alphaBlend) {
if(drawMode & Prim::zwrite)
px.bz = z;
below.r = color[0];
below.g = color[1];
below.b = color[2];
}
else {
// Blend into bottom pixel
if(drawMode & Prim::zwrite)
px.bz += int64(z - px.bz) * (color[3] + 1)/32u;
below.r += (color[0] - below.r) * (color[3] + 1)/32u;
below.g += (color[1] - below.g) * (color[3] + 1)/32u;
below.b += (color[2] - below.b) * (color[3] + 1)/32u;
}
below.a = max(color[3], below.a);
below.id = id;
} else {
// Bottom empty, simply replace
if(drawMode & Prim::zwrite)
px.bz = z;
below.r = color[0];
below.g = color[1];
below.b = color[2];
below.a = color[3];
below.id = id;
}
}
continue;
}
if(ztest_a) {
if(drawMode & Prim::fog) px.flags |= Pixel::fog;
else px.flags &= ~Pixel::fog;
// Push the top pixel down. Anything beneath is lost.
// Check first to avoid breaking backAlpha == 0.
if(above.a /*not empty*/) {
px.bz = px.az;
below = above;
below.a = 31;
}
above.r = color[0];
above.g = color[1];
above.b = color[2];
above.a = color[3];
above.id = id;
px.az = z;
if(edge) px.flags |= Pixel::edge;
else px.flags &= ~Pixel::edge;
}
else if(ztest_b) {
below.r = color[0];
below.g = color[1];
below.b = color[2];
below.a = 31;
below.id = id;
px.bz = z;
}
}
poly.prev_lx = poly.left.x;
poly.prev_rx = poly.right.x;
}
poly.left = poly.lnext;
poly.right = poly.rnext;
for(unsigned j = 0; j < Interpolants::count; j++) {
poly.lnext[j] += poly.dl_dv[j];
poly.rnext[j] += poly.dr_dv[j];
}
}
}

View File

@ -1,163 +0,0 @@
struct ActivePoly;
union Interpolants;
struct DrawList;
void updateDrawList(int y, DrawList& list, bool sorted);
void setupPoly(ActivePoly& poly, Prim* prim, Vertex *vs, unsigned numVerts);
void setupNextEdge(ActivePoly& poly, unsigned& vi, unsigned& next, int side, Interpolants& edge, Interpolants& gradient);
void assignEdge(Interpolants& edge, Vertex* v);
void renderScanline();
void renderDrawList(int y, DrawList& list, bool blend);
uint32 regRenderOptions();
uint32 regRenderLoad();
void regRenderOptions(uint32 data, uint32 mask);
void regRenderMinAlpha(uint32 data, uint32 mask);
void regRenderClearColor(uint32 data, uint32 mask);
void regRenderClearCoord(uint32 data, uint32 mask);
void regRenderFogColor(uint32 data, uint32 mask);
void regRenderFogCoord(uint32 data, uint32 mask);
void regRenderFogTable(unsigned index, uint32 data, uint32 mask);
void regRenderEdgeTable(unsigned index, uint32 data, uint32 mask);
void regRenderToonTable(unsigned index, uint32 data, uint32 mask);
struct Texture {
// Lookup data - 44 significant bits
uint64 key; // paladdr<<32 | format<<16 | texaddr<<0
uint32 imageBase, indexBase, colorBase;
// Mirrored textures: clamp at 2*size, wrap at 2*size, cache all 4 mirrors
// Repeating textures: clamp dt 1*size, wrap at 1*size
// Clamped textures: clamp at 1*size, wrap at UINT_MAX
int32 width, height;
int32 umask, vmask;
uint32 *image;
// Address ranges for dirty checks
uint32 texelRange[2]; // pixel data
uint32 indexRange[2]; // for compressed format
uint32 colorRange[2]; // for paletted formats
~Texture() { delete[] image; }
Texture(uint64 key, int width, int height) : key(key),
width(width), height(height), umask(~0), vmask(~0), image(nullptr) {}
};
struct TexCache {
void reset();
void flushDirty(unsigned bank);
bool checkDirty(Texture *texture, uint32 *range, VRAMMapping *map, uint32 mask);
void add(Texture *t);
Texture *get(uint64 key);
enum { tableSize = 1<<10 };
vector<Texture*> table[tableSize];
int size;
} texCache;
Texture *getTexture(uint32 texImage, uint16 texPalette);
void mirrorTexture(Texture *tex, int width, int height);
void convertPacked(Texture *tex, int width, int height);
void convertI2(Texture *tex, int width, int height, bool colorKey);
void convertI3(Texture *tex, int width, int height);
void convertI4(Texture *tex, int width, int height, bool colorKey);
void convertI5(Texture *tex, int width, int height);
void convertI8(Texture *tex, int width, int height, bool colorKey);
void convertARGB(Texture *tex, int width, int height);
union Interpolants {
int64& operator[](unsigned i) { return val[i]; }
int64 val[8]; enum { count=8 };
struct {
int64 r, g, b, u, v, z, inv_w, x;
};
};
struct ActivePoly {
Prim *p;
uint8 numVerts;
int8 side[8]; // is vertex on left or right?
Vertex *v[8];
Interpolants left, right; // current values at l, r
Interpolants lnext, rnext;
Interpolants dl_dv, dr_dv; // gradient down each side
unsigned lv, rv; // index of current vertex
unsigned next_lv, next_rv; // index of next vertex
int32 prev_lx, prev_rx; // x span on previous line
};
struct Pixel {
// Z for depth sorting. We need two layers (!) to make edge smoothing
// independent of draw order. It also saves the trouble of blending every
// pixel drawn - the worst case is a single pass at the end.
int32 az, bz; // at least 24 bits? 15 bit had too much Z fighting :/
int32 sz; // shadow buffer z
struct {
uint8 r, g, b; // 6 bits
uint8 a; // 5 bits
// Object ID is a kludge Nintendo came up with to deal with edge fill.
// They chose to make all polygon edges overlap, so their antialiasing
// looks right. However, this means translucent edge pixels would be
// drawn twice. The object ID is used to suppress that.
uint8 id;
} a, b;
// Some miscellaneous bits, only one layer here.
// There have been comments to the effect that multiple stencil bits are
// needed, but I don't think that's true - shadow volumes are drawn one
// at a time with the stencil cleared in-between, and the object ID
// prevents shadows blending more than once per pixel per light.
uint8 flags; enum {
fog = 1<<7, // apply post pass Z-based fogging
edge = 1<<6, // apply post pass toon edge filter
stencil = 1<<5, // pixels can be drawn here in stencil mode
blends = 1<<4, // if true, treat a as alpha; else as coverage
};
};
struct RC {
uint1 texturing;
uint1 toonShading; enum { darken, lighten };
uint2 edgeMode; enum { solid, smooth, toon, blended };
uint2 fogMode; enum { color=2, alpha=3 };
uint4 fogLevel;
uint1 alphaTest;
uint1 alphaBlend;
uint1 backImage;
} renderControl;
struct DrawList {
unsigned first, last;
unsigned nextPrim, lastPrim;
ActivePoly polys[6144];
} drawList[2]; // 2 passes - solid and translucent
uint5 minAlpha;
uint6 backColor[3];
uint5 backAlpha;
uint6 backId;
uint8 backX, backY;
uint15 backZ;
uint1 backFog;
int8 fogTable[32+1];
uint6 fogColor[3];
uint5 fogAlpha;
int16 fogZ;
bool stencilMode;
Vector toonTable[2][32];
Vector edgeTable[8];
// Need two buffers here for edge transparency
// Need three for toon edges, argh. Will redo later.
Pixel pixels[4][256+2];

View File

@ -1,304 +0,0 @@
void GPU::TexCache::reset() {
for(auto &bucket : table) {
for(auto &texture : bucket)
delete texture;
bucket.reset();
}
size = 0;
}
bool GPU::TexCache::checkDirty(Texture *texture, uint32 *range, VRAMMapping* map, uint32 mask) {
uint32 addr = range[0] & ~255 & mask;
uint32 end = range[1] + 255 & ~255 & mask;
for(; addr != end; addr = addr+256 & mask)
if(map[addr>>14].dirty(addr)) return true;
return false;
}
void GPU::TexCache::flushDirty(unsigned bank) {
// Called whenever VRAM is (re-)assigned as texture memory
// (ie. the game finished uploading and has locked it).
bool palMem = bank >= 4;
for(auto &bucket : table) {
for(auto &texture : bucket) {
if(!(!palMem && checkDirty(texture, texture->texelRange, system.vmap.tex, 0x7ffff)
|| !palMem && checkDirty(texture, texture->indexRange, system.vmap.tex, 0x7ffff)
|| palMem && checkDirty(texture, texture->colorRange, system.vmap.texpal, 0x1ffff) ))
continue;
delete texture;
texture = nullptr;
}
// Flush all the nullptrs at once to avoid O(n^2) removal.
unsigned count = 0;
for(unsigned n = 0; n < bucket.size(); n++) {
if(bucket[n]) bucket[count++] = bucket[n];
}
bucket.resize(count);
}
}
void GPU::TexCache::add(Texture *t) {
uint10 hash = t->key ^ t->key>>10 ^ t->key>>20 ^ t->key>>30;
table[hash].append(t);
size++;
}
GPU::Texture* GPU::TexCache::get(uint64 key) {
uint10 hash = key ^ key>>10 ^ key>>20 ^ key>>30;
for(auto texture : table[hash]) {
if(texture->key == key)
return texture;
}
return nullptr;
}
GPU::Texture *GPU::getTexture(uint32 texImage, uint16 texPalette) {
//texCache.reset();
uint64 key = (uint64) texImage ^ (uint64)texPalette<<30;
Texture *tex = texCache.get(key);
if(!tex) {
int format = texImage>>26 & 7;
if(!format) return nullptr;
bool colorKey = texImage & 1<<29;
unsigned uaxis = texImage>>16 & 5;
unsigned vaxis = texImage>>17 & 5;
unsigned imgwidth = 8 << (texImage>>20 & 7);
unsigned imgheight = 8 << (texImage>>23 & 7);
tex = new Texture(key, imgwidth, imgheight);
tex->colorBase = 8*(texPalette & 0x3fff);
tex->imageBase = 8*(texImage & 0xffff);
tex->indexBase = 0x20000 + (tex->imageBase/2 & 0xffff);
if(tex->imageBase >= 0x40000)
tex->indexBase += 0x10000;
// Mirroring implies repeat as well.
if(uaxis == Prim::mirror) tex->width *= 2;
if(vaxis == Prim::mirror) tex->height *= 2;
if(uaxis & Prim::repeat) tex->umask = tex->width-1;
if(vaxis & Prim::repeat) tex->vmask = tex->height-1;
tex->image = new uint32[tex->width * tex->height];
tex->colorRange[0] = tex->colorRange[1] = tex->colorBase;
tex->texelRange[0] = tex->texelRange[1] = tex->imageBase;
tex->indexRange[0] = tex->indexRange[1] = tex->indexBase;
switch(format) {
case Prim::packed: convertPacked(tex, imgwidth, imgheight); break;
case Prim::I2: convertI2(tex, imgwidth, imgheight, colorKey); break;
case Prim::A5_I3: convertI3(tex, imgwidth, imgheight); break;
case Prim::I4: convertI4(tex, imgwidth, imgheight, colorKey); break;
case Prim::A3_I5: convertI5(tex, imgwidth, imgheight); break;
case Prim::I8: convertI8(tex, imgwidth, imgheight, colorKey); break;
case Prim::A1_RGB15: convertARGB(tex, imgwidth, imgheight); break;
}
mirrorTexture(tex, imgwidth, imgheight);
texCache.add(tex);
}
return tex;
}
void GPU::mirrorTexture(Texture *tex, int width, int height) {
for(int y = 0; y < height; y++) {
uint32 *row = &tex->image[tex->width*y];
uint32 *mirror = &tex->image[tex->width*(tex->height-1 - y)];
if(tex->width > width) {
for(int x = 0; x < width; x++)
row[tex->width-1 - x] = row[x];
}
if(tex->height > height) {
memcpy(mirror, row, tex->width*sizeof(*row));
}
}
}
void GPU::convertPacked(Texture *tex, int width, int height) {
uint32 taddr = tex->imageBase;
uint32 caddr = tex->indexBase;
tex->texelRange[1] += (width/4)*(height/4)*4;
tex->indexRange[1] += (width/4)*(height/4)*2;
tex->colorRange[0] += 0x10000;
for(unsigned v = 0; v < height; v += 4) {
uint32 *dest = &tex->image[tex->width*v];
for(unsigned u = 0; u < width; u += 4) {
// Retrieve 4x4 block = 16 pixels x 2bpp = 32 bits
auto &page = system.vmap.tex[taddr>>14 & 31];
uint32 block = page[taddr] | page[taddr+2]<<16;
// Retrieve color selection and mode bits
uint16 colorsel = system.vmap.tex[caddr>>14 & 31][caddr];
uint32 paddr = tex->colorBase + (4*colorsel & 0xfffc);
uint32 fields = 31<<20 | 31<<10 | 31<<0;
uint32 gap = 15; // space apart to allow averaging
// This could potentially span most of the palette space, but that would
// complicate the game's texture allocation, so rather unlikely.
tex->colorRange[0] = min(tex->colorRange[0], paddr+0);
tex->colorRange[1] = max(tex->colorRange[1], paddr+8);
uint32 alpha[4] = { 31, 31, 31, 31 };
uint32 colors[4] = {
uint15(system.vmap.texpal[paddr+0 >> 14 & 31][paddr+0]) * (1<<gap|1) & fields,
uint15(system.vmap.texpal[paddr+2 >> 14 & 31][paddr+2]) * (1<<gap|1) & fields,
uint15(system.vmap.texpal[paddr+4 >> 14 & 31][paddr+4]) * (1<<gap|1) & fields,
uint15(system.vmap.texpal[paddr+6 >> 14 & 31][paddr+6]) * (1<<gap|1) & fields,
};
switch(colorsel >> 14) {
case 3: // 2 colors (4-way mix)
colors[2] = (5*colors[0] + 3*colors[1])/8 & fields;
colors[3] = (3*colors[0] + 5*colors[1])/8 & fields;
case 2: // 4 colors (separate)
break;
case 1: // 2 colors (3-way mix) + transparent
colors[2] = (4*colors[0] + 4*colors[1])/8 & fields;
case 0: // 3 colors (separate) + transarent
alpha[3] = 0;
break;
}
for(int i = 0; i < 4; i++) {
colors[i] |= colors[i] >> gap;
colors[i] = alpha[i]<<15 | colors[i] & 0x7fff;
}
for(int sv = 0; sv < 4; sv++, dest += tex->width) {
for(int su = 0; su < 4; su++, block >>= 2)
dest[su] = colors[block & 3];
}
dest += 4 - 4*tex->width;
taddr += 4;
caddr += 2;
}
}
}
void GPU::convertI2(Texture *tex, int width, int height, bool colorKey) {
tex->texelRange[1] += width*height/4;
tex->colorRange[1] += 8;
for(unsigned v = 0; v < height; v++) {
uint32 *dest = &tex->image[tex->width*v];
uint32 taddr = tex->imageBase + width/4*v;
for(unsigned u = 0; u < width; u++, taddr += !(u%4)) {
uint2 index = system.vmap.tex[taddr>>14 & 31][taddr] >> 2*(u&7);
uint32 paddr = tex->colorBase + 2*index;
uint16 bgr = system.vmap.texpal[paddr>>14 & 31][paddr];
uint5 a = 31*(colorKey == false || index > 0);
*dest++ = a<<15 | bgr;
}
}
}
void GPU::convertI3(Texture *tex, int width, int height) {
tex->texelRange[1] += width*height/1;
tex->colorRange[1] += 16;
for(unsigned v = 0; v < height; v++) {
uint32 *dest = &tex->image[tex->width*v];
uint32 taddr = tex->imageBase + width*v;
for(unsigned u = 0; u < width; u++, taddr++) {
uint8 index = system.vmap.tex[taddr>>14 & 31][taddr] >> 8*(u&1);
uint32 paddr = tex->colorBase + 2*(index & 0x07);
uint16 bgr = system.vmap.texpal[paddr>>14 & 31][paddr];
uint5 a = index>>3;
*dest++ = a<<15 | bgr;
}
}
}
void GPU::convertI4(Texture *tex, int width, int height, bool colorKey) {
tex->texelRange[1] += width*height/2;
tex->colorRange[1] += 32;
for(unsigned v = 0; v < height; v++) {
uint32 *dest = &tex->image[tex->width*v];
uint32 taddr = tex->imageBase + width/2*v;
for(unsigned u = 0; u < width; u++, taddr += !(u%2)) {
uint4 index = system.vmap.tex[taddr>>14 & 31][taddr] >> 4*(u&3);
uint32 paddr = tex->colorBase + 2*index;
uint16 bgr = system.vmap.texpal[paddr>>14 & 31][paddr];
uint5 a = 31*(colorKey == false || index > 0);
*dest++ = a<<15 | bgr;
}
}
}
void GPU::convertI5(Texture *tex, int width, int height) {
tex->texelRange[1] += width*height/1;
tex->colorRange[1] += 64;
for(unsigned v = 0; v < height; v++) {
uint32 *dest = &tex->image[tex->width*v];
uint32 taddr = tex->imageBase + width*v;
for(unsigned u = 0; u < width; u++, taddr++) {
uint8 index = system.vmap.tex[taddr>>14 & 31][taddr] >> 8*(u&1);
uint32 paddr = tex->colorBase + 2*(index & 0x1f);
uint16 bgr = system.vmap.texpal[paddr>>14 & 31][paddr];
uint5 a = (index>>5) * 9/2;
*dest++ = a<<15 | bgr;
}
}
}
void GPU::convertI8(Texture *tex, int width, int height, bool colorKey) {
tex->texelRange[1] += width*height/1;
tex->colorRange[1] += 512;
for(unsigned v = 0; v < height; v++) {
uint32 *dest = &tex->image[tex->width*v];
uint32 taddr = tex->imageBase + width*v;
for(unsigned u = 0; u < width; u++, taddr++) {
uint8 index = system.vmap.tex[taddr>>14 & 31][taddr] >> 8*(u&1);
uint32 paddr = tex->colorBase + 2*index;
uint16 bgr = system.vmap.texpal[paddr>>14 & 31][paddr];
uint5 a = 31*(colorKey == false || index > 0);
*dest++ = a<<15 | bgr;
}
}
}
void GPU::convertARGB(Texture *tex, int width, int height) {
tex->texelRange[1] += width*height*2;
for(unsigned v = 0; v < height; v++) {
uint32 *dest = &tex->image[tex->width*v];
uint32 taddr = tex->imageBase + 2*width*v;
for(unsigned u = 0; u < width; u++, taddr += 2) {
uint16 abgr = system.vmap.tex[taddr>>14 & 31][taddr];
uint5 a = 31*(abgr >> 15);
*dest++ = a<<15 | abgr;
}
}
}

View File

@ -1,410 +0,0 @@
#include <nds/nds.hpp>
#include <sys/time.h>
namespace NintendoDS {
Interface *interface = nullptr;
double Interface::videoFrequency() {
return 2.*33513982 / (2*6 * 263*355);
}
double Interface::audioFrequency() {
return 2.*33513982 / (2 * 1024);
}
bool Interface::loaded() {
return true;
}
unsigned Interface::group(unsigned id) {
if(id == ARM7BIOS || id == ARM9BIOS) return 0;
if(id == Firmware || id == Clock) return 0;
if(id == Slot1ROM || id == Slot1EEPROM) return 1;
if(id == Slot1FRAM || id == Slot1Flash) return 1;
if(id == Slot2ROM || id == Slot2RAM) return 2;
if(id == Slot2SRAM || id == Slot2EEPROM) return 2;
if(id == Slot2FRAM || id == Slot2Flash) return 2;
return 0;
}
void Interface::load(unsigned id, const string& manifest) {
//print(manifest,"\n");
string syspath = interface->path(System);
if(id == NintendoDS) {
gameManifest = manifest;
systemManifest.readfile({syspath, "manifest.xml"});
XML::Document sysdoc(systemManifest);
if(!sysdoc["system"].exists()) {
interface->notify("manifest.xml not found");
}
else {
auto &sys = sysdoc["system"];
string arm7BiosFile = sys["arm7"]["bios"]["data"].data;
string arm9BiosFile = sys["arm9"]["bios"]["data"].data;
string firmwareFile = sys["flash"]["data"].data;
string clockXmlFile = sys["rtc"]["data"].data;
if(!file::exists({syspath, arm7BiosFile})) interface->notify("arm7 bios not found");
if(!file::exists({syspath, arm9BiosFile})) interface->notify("arm9 bios not found");
if(!file::exists({syspath, firmwareFile})) interface->notify("firmware not found");
interface->loadRequest(ARM7BIOS, arm7BiosFile);
interface->loadRequest(ARM9BIOS, arm9BiosFile);
interface->loadRequest(Firmware, firmwareFile);
interface->loadRequest(Clock, clockXmlFile);
}
if(gameManifest == "") {
// Default to 1GB ROM with no save. Since GameCard does bounds-check,
// we only allocate enough to hold the stream passed in.
gameManifest =
"<cartridge>"
"<slot1>"
"<rom name=\"rom\" size=\"0x40000000\" />"
"</slot1>"
"</cartridge>";
}
// <cartridge>
XML::Document document(gameManifest);
//if(document.error != "") {
// print(document.error,"\n");
// return;
//}
auto &eslot1 = document["cartridge"]["slot1"];
// <slot1>
if(eslot1.exists()) {
// <rom name=.. id=.. size=.. sha256=.. />
if(eslot1["rom"].exists()) {
string file = string(eslot1["rom"]["name"].data);
uint32 size = numeral(eslot1["rom"]["size"].data);
uint32 chipId = numeral(eslot1["rom"]["id"].data);
print("Loading slot-1 ROM (", file, ").. ");
slot1.load(new GameCard(chipId));
interface->loadRequest(Slot1ROM, file);
print("\n");
}
// <save name=.. type=EEPROM,FRAM,Flash size=.. [page|id=..] />
if(eslot1["save"].exists()) {
string file = string(eslot1["save"]["name"].data);
string type = string(eslot1["save"]["type"].data);
uint32 size = numeral(eslot1["save"]["size"].data);
uint32 psize = numeral(eslot1["save"]["page"].data); // EEPROM only
uint32 chipId = numeral(eslot1["save"]["id"].data); // Flash only
unsigned id = 0;
if(auto card = slot1.card) {
if(type == "EEPROM") id = Slot1EEPROM, card->spi = new EEPROM(size, psize);
if(type == "Flash") id = Slot1Flash, card->spi = new Flash(size, chipId);
if(type == "FRAM") id = Slot1FRAM, card->spi = new FRAM(size);
}
if(id) {
print("Loading slot-1 ",eslot1["save"]["type"].data," (", file, ").. ");
interface->loadRequest(id, file);
print("\n");
}
}
// <irport />
if(eslot1["irport"].exists()) {
// Required by Pokemon HG/SS and B/W. These cards have an infrared port
// built-in. Since there's only one /CS, access to flash memory passes
// through the infrared bridge via an override command.
slot1.card->spi = new IRPort(slot1.card->spi);
}
}
}
// Provide blank images if needed (we'd crash otherwise).
if(system.firmware.size == 0) {
system.firmware.size = 0x40000;
system.firmware.data = new uint8[system.firmware.size];
memset(system.firmware.data, 0xff, system.firmware.size);
}
if(arm7.bios.size == 0) {
arm7.bios.size = 4;
arm7.bios.data = new uint32[arm7.bios.size/4];
memset(arm7.bios.data, 0xef, arm7.bios.size);
}
if(arm9.bios.size == 0) {
arm9.bios.size = 4;
arm9.bios.data = new uint32[arm9.bios.size/4];
memset(arm9.bios.data, 0xef, arm9.bios.size);
}
}
void Interface::load(unsigned id, const stream& memory, const string& markup) {
if(id == ARM7BIOS) return system.loadArm7Bios(memory);
if(id == ARM9BIOS) return system.loadArm9Bios(memory);
if(id == Firmware) return system.loadFirmware(memory);
if(id == Clock) return system.loadRTC(memory);
XML::Document document(gameManifest);
auto &eslot1 = document["cartridge"]["slot1"];
if(eslot1.exists()) {
if(eslot1["rom"].exists() && id == Slot1ROM) {
string hash = string(eslot1["rom"]["sha256"].data);
uint32 size = numeral(eslot1["rom"]["size"].data);
if(auto card = slot1.card) {
delete card->rom.data;
card->rom.size = min(memory.size(), size);
card->rom.data = new uint8[card->rom.size];
card->size = bit::round(size);
memory.read(card->rom.data, card->rom.size);
if(hash && hash != card->sha256) print("SHA256 mismatch.");
else print("OK.");
}
}
if(eslot1["save"].exists() && slot1.card && (id==Slot1EEPROM || id==Slot1Flash || id==Slot1FRAM)) {
uint32 size = numeral(eslot1["save"]["size"].data);
if(auto save = slot1.card->spi) {
if(auto irport = dynamic_cast<IRPort*>(save))
save = irport->slave;
if(auto media = dynamic_cast<StaticMemory*>(save)) {
memory.read(media->data, min(media->size, memory.size()));
print("OK.");
}
}
}
}
}
void Interface::save() {
XML::Document sysdoc(systemManifest);
if(sysdoc["system"].exists()) {
auto &sys = sysdoc["system"];
interface->saveRequest(Firmware, sys["flash"]["data"].data);
interface->saveRequest(Clock, sys["rtc"]["data"].data);
}
XML::Document document(gameManifest);
auto &eslot1 = document["cartridge"]["slot1"];
if(eslot1.exists() && eslot1["save"].exists()) {
string file = eslot1["save"]["name"].data;
string type = eslot1["save"]["type"].data;
print("Saving slot-1 ",type,".. ");
if(type == "EEPROM") interface->saveRequest(Slot1EEPROM, file);
if(type == "Flash") interface->saveRequest(Slot1Flash, file);
if(type == "FRAM") interface->saveRequest(Slot1FRAM, file);
print("\n");
}
}
void Interface::save(unsigned id, const stream& memory) {
if(id == Firmware) return system.saveFirmware(memory);
if(id == Clock) return system.saveRTC(memory);
if(slot1.card && (id == Slot1EEPROM || id == Slot1Flash || id == Slot1FRAM)) {
if(auto save = slot1.card->spi) {
if(auto irport = dynamic_cast<IRPort*>(save))
save = irport->slave;
if(auto media = dynamic_cast<StaticMemory*>(save)) {
memory.write(media->data, media->size);
print("OK.");
}
}
}
}
void Interface::unload() {
struct timeval tv;
gettimeofday(&tv, nullptr);
system.clock.freeze(tv.tv_sec, tv.tv_usec);
system.running = false;
save();
delete slot1.unload();
}
void Interface::power() {
system.power();
}
void Interface::run() {
if(!system.running) {
struct timeval tv;
gettimeofday(&tv, nullptr);
system.clock.thaw(tv.tv_sec, tv.tv_usec);
system.running = true;
}
system.run();
}
serializer Interface::serialize() {
return {};
}
bool Interface::unserialize(serializer &s) {
return false;
}
void Interface::paletteUpdate() {
for(unsigned color = 0; color < 01000000; color++) {
uint16 r = uint6(color>> 0) * 010101/4;
uint16 g = uint6(color>> 6) * 010101/4;
uint16 b = uint6(color>>12) * 010101/4;
palette[color] = interface->videoColor(color, r,g,b);
}
}
void Interface::videoRefresh(const uint32_t *data, unsigned pitch, unsigned width, unsigned height) {
static uint32_t pixels[256*384];
for(unsigned y = 0; y < 384; y++) {
const uint32 *src = &data[y*pitch/4];
uint32 *dest = &pixels[y*256];
for(unsigned x = 0; x < 256; x++)
dest[x] = palette[src[x] & 0777777];
}
return bind->videoRefresh(pixels, 256*4, 256, 384);
}
Interface::Interface() {
interface = this;
information.name = "Nintendo DS";
information.width = 256;
information.height = 384;
information.aspectRatio = 1.0;
information.overscan = false;
information.resettable = false;
information.capability.states = false;
information.capability.cheats = false;
media.append({NintendoDS, "Nintendo DS", "nds"});
//media.append({NintendoDS, "Nintendo DS", "nds", "Menu"});
// Input devices and ports
emptySlot = Device{ID::Device::Empty, 1<<ID::Port::Slot1|1<<ID::Port::Slot2, "Empty"};
// Slot 1 devices
gameCard = Device{ID::Device::GameCard, 1<<ID::Port::Slot1, "Game Card"};
// Slot 2 devices
gamePak = Device{ID::Device::GamePak, 1<<ID::Port::Slot2, "Game Pak"};
expansionPak = Device{ID::Device::ExpansionPak, 1<<ID::Port::Slot2, "Expansion Pak"};
rumblePak = Device{ID::Device::RumblePak, 1<<ID::Port::Slot2, "Rumble Pak"};
guitarGrip = Device{ID::Device::GuitarGrip, 1<<ID::Port::Slot2, "Guitar Grip"};
piano = Device{ID::Device::Piano, 1<<ID::Port::Slot2, "Piano"};
paddle = Device{ID::Device::Paddle, 1<<ID::Port::Slot2, "Paddle"};
guitarGrip.input.append({ID::GuitarGrip::Green, 0, "Green"});
guitarGrip.input.append({ID::GuitarGrip::Red, 0, "Red"});
guitarGrip.input.append({ID::GuitarGrip::Yellow, 0, "Yellow"});
guitarGrip.input.append({ID::GuitarGrip::Blue, 0, "Blue"});
guitarGrip.order.append(0,1,2,3);
piano.input.append({ID::Piano::C, 0, "C"});
piano.input.append({ID::Piano::Cs, 0, "C#"});
piano.input.append({ID::Piano::D, 0, "D"});
piano.input.append({ID::Piano::Ds, 0, "D#"});
piano.input.append({ID::Piano::E, 0, "E"});
piano.input.append({ID::Piano::F, 0, "F"});
piano.input.append({ID::Piano::Fs, 0, "F#"});
piano.input.append({ID::Piano::G, 0, "G"});
piano.input.append({ID::Piano::Gs, 0, "G#"});
piano.input.append({ID::Piano::A, 0, "A"});
piano.input.append({ID::Piano::As, 0, "A#"});
piano.input.append({ID::Piano::B, 0, "B"});
piano.input.append({ID::Piano::Ch, 0, "C - high"});
piano.order.append(0,1,2,3,4,5,6,7,8,9,10,11,12);
paddle.input.append({ID::Paddle::Rotation, 1, "Rotation"});
paddle.order.append(0);
// Internal inputs
buttons = Device{ID::Device::BuiltIn, 1<<ID::Port::Buttons, "Built-in"};
sensors = Device{ID::Device::BuiltIn, 1<<ID::Port::Sensors, "Built-in"};
touchpad = Device{ID::Device::BuiltIn, 1<<ID::Port::Touchpad, "Built-in"};
sensors.input.append({ID::Sensors::Lid, 0, "Lid closed"});
sensors.input.append({ID::Sensors::Battery, 0, "Low battery"});
sensors.input.append({ID::Sensors::Mains, 0, "Charging"});
sensors.input.append({ID::Sensors::Headphones, 0, "Headphones"}); // ?
sensors.input.append({ID::Sensors::Temperature, 2, "Temperature"});
sensors.input.append({ID::Sensors::FIQ, 0, "FIQ - debug"});
sensors.input.append({ID::Sensors::Reset, 0, "Reset - debug"});
sensors.order.append(0,1,2,3,4,5,6);
buttons.input.append({ID::Buttons::A, 0, "A"});
buttons.input.append({ID::Buttons::B, 0, "B"});
buttons.input.append({ID::Buttons::Select, 0, "Select"});
buttons.input.append({ID::Buttons::Start, 0, "Start"});
buttons.input.append({ID::Buttons::Right, 0, "Right"});
buttons.input.append({ID::Buttons::Left, 0, "Left"});
buttons.input.append({ID::Buttons::Up, 0, "Up"});
buttons.input.append({ID::Buttons::Down, 0, "Down"});
buttons.input.append({ID::Buttons::R, 0, "R"});
buttons.input.append({ID::Buttons::L, 0, "L"});
buttons.input.append({ID::Buttons::X, 0, "X"});
buttons.input.append({ID::Buttons::Y, 0, "Y"});
buttons.input.append({ID::Buttons::C, 0, "C - debug"}); // unconnected inputs
buttons.input.append({ID::Buttons::D, 0, "D - debug"}); // <- present on debug units?
buttons.input.append({ID::Buttons::Z, 0, "Z - debug"}); //
buttons.input.append({ID::Buttons::W, 0, "W - debug"}); //
buttons.order.append(6,7,5,4,2,3, 1,0,10,11,9,8, 12,13,14,15);
touchpad.input.append({ID::Touchpad::X, 2, "X position"});
touchpad.input.append({ID::Touchpad::Y, 2, "Y position"});
touchpad.input.append({ID::Touchpad::Pressure, 2, "Pressure"});// (analog)"});
//touchpad.input.append({ID::Touchpad::PressureD, 0, "Pressure (digital)"});
touchpad.order.append(0,1,2);//,3);
// Ports
port.append({ID::Port::Buttons, "Buttons"});
port.append({ID::Port::Sensors, "Sensors"});
port.append({ID::Port::Touchpad, "Touchpad"});
port.append({ID::Port::Slot1, "Slot 1"});
port.append({ID::Port::Slot2, "Slot 2"});
device.append(buttons);
device.append(sensors);
device.append(touchpad);
device.append(emptySlot);
device.append(gameCard);
device.append(gamePak);
device.append(expansionPak);
device.append(rumblePak);
device.append(guitarGrip);
device.append(piano);
device.append(paddle);
for(auto &port : this->port)
for(auto &device : this->device)
if(device.portmask & 1<<port.id)
port.device.append(device);
}
}

View File

@ -1,91 +0,0 @@
#ifndef NDS_HPP
namespace NintendoDS {
#endif
struct ID {
struct Port { enum{
Buttons, Sensors, Touchpad,
Slot1, Slot2,
}; };
struct Device { enum{
Empty=0, BuiltIn=0,
/* Slot1 */ GameCard=1,
/* Slot2 */ GamePak=1, ExpansionPak, RumblePak, GuitarGrip, Piano, Paddle,
};};
// Internal inputs
struct Sensors { enum{ Lid, Battery, Mains, Temperature, Headphones, FIQ, Reset }; };
struct Buttons { enum{ A,B, Select,Start, Right,Left,Up,Down, R,L, X,Y,C,D,Z,W }; };
struct Touchpad { enum{ X, Y, Pressure, PressureD }; };
// Slot 2 device inputs
struct GuitarGrip { enum{ Green, Red, Yellow, Blue }; };
struct Piano { enum{ C, Cs, D, Ds, E, F, Fs, G, Gs, A, As, B, Ch }; };
struct Paddle { enum{ Rotation }; };
};
struct Interface : Emulator::Interface {
double videoFrequency();
double audioFrequency();
enum {
System,
NintendoDS,
ARM7BIOS, ARM9BIOS, Firmware, Clock,
Slot1ROM, Slot1EEPROM, Slot1FRAM, Slot1Flash,
Slot2ROM, Slot2RAM, Slot2SRAM, Slot2EEPROM, Slot2FRAM, Slot2Flash,
};
unsigned group(unsigned id);
bool loaded();
void load(unsigned id, const string &manifest = "");
void load(unsigned id, const stream &memory, const string &markup = "");
void save();
void save(unsigned id, const stream &memory);
void unload();
void power();
void run();
void videoRefresh(const uint32_t *data, unsigned pitch, unsigned width, unsigned height);
serializer serialize();
bool unserialize(serializer&);
void paletteUpdate();
Interface();
private:
string systemManifest;
string gameManifest;
vector<Device> device;
// System inputs
Device sensors;
Device buttons;
Device touchpad;
Device emptySlot;
// Slot 1
Device gameCard;
// Slot 2 peripherals
Device gamePak; // for linking Pokémon D/P/HG/SS and R/S/E
Device expansionPak; // Opera browser, homebrew
Device rumblePak; // Metroid Pinball, others
Device guitarGrip; // Guitar Hero: On Tour
Device piano; // Easy Piano
Device paddle; // Arkanoid
unsigned palette[01000000];
};
extern Interface *interface;
#ifndef NDS_HPP
}
#endif

View File

@ -1,96 +0,0 @@
EEPROM::~EEPROM() { }
EEPROM::EEPROM(uint32 esize, uint32 psize) {
data = new uint8[size = esize];
pageSize = psize;
memset(data, 0xff, size);
}
EEPROM::EEPROM(const stream& memory, uint32 esize, uint32 psize) {
data = new uint8[size = esize];
pageSize = psize;
memset(data, 0xff, size);
memory.read(data, min(memory.size(), size));
}
void EEPROM::power() {
if(!pageSize) {
if(size <= 0x200) pageSize = 0x10;
else if(size <= 0x2000) pageSize = 0x20;
else pageSize = 0x80;
}
command = 0;
writeEnable = false;
}
void EEPROM::select(bool state) {
if(state) return;
if(command && writeEnable) {
// Finish some previously submitted commands
if(command == 0x02 || command == 0x0a) {
//print("finishing write to ",hex<6>(page),"\n");
address = page;
// Write page
for(unsigned n = 0; n < count; n++) {
if(!(address & pageSize-1)) address = page;
this->data[address++] = buffer[n];
}
writeEnable = false;
}
}
// Reset and wait for a new command
command = 0;
}
uint8 EEPROM::transfer(uint8 data) {
if(command == 0) {
command = data;
address = count = 0;
addrCount = 0;
//print("eeprom: cmd ",hex<2>(command),"\n");
if(command == 0x02 || command == 0x0a) addrCount = 1 + (size > 512);
if(command == 0x03 || command == 0x0b) addrCount = 1 + (size > 512);
if(command == 0x0a && size == 512) address = 1; // read/write 2nd page
if(command == 0x0b && size == 512) address = 1; // (512-byte only)
if(command == 0x04) writeEnable = false;
if(command == 0x06) writeEnable = true;
return 0xff;
}
if(addrCount) { // Input address
addrCount--;
address = address<<8 | data;
page = address;
return 0xff;
}
if(command == 0x01) {
return 0xff; // Write status
}
if(command == 0x05) { // Read status
return (size > 512? 0x00 : 0xf0) | writeEnable<<1; // | writing<<0;
}
if(command == 0x03 || command == 0x0b) { // Read data
//if(address == page) print("eeprom: read ",hex<6>(address),"\n");
if(address == size) address = 0;
return this->data[address++];
}
if(command == 0x02 || command == 0x0a) { // Write data
//if(address == page) print("eeprom: write ",hex<6>(address),"\n");
if(count < pageSize) {
if(!(address & pageSize-1)) address = page;
buffer[count++] = data;
return this->data[address++];
}
}
return 0xff;
}

View File

@ -1,17 +0,0 @@
struct EEPROM : SPIDevice, StaticMemory {
~EEPROM();
EEPROM(uint32 esize, uint32 psize);
EEPROM(const stream& memory, uint32 esize, uint32 psize);
void power();
void select(bool state);
uint8 transfer(uint8 data);
uint8 command;
uint16 address, page, pageSize;
unsigned addrCount;
unsigned count;
uint8 buffer[256];
bool writeEnable;
};

View File

@ -1,137 +0,0 @@
Flash::~Flash() { }
Flash::Flash(uint32 esize, uint32 id) {
data = new uint8[size = esize];
memset(data, 0xff, size);
this->id = id;
}
Flash::Flash(const stream& memory, uint32 esize, uint32 id) {
data = new uint8[size = esize];
memset(data, 0xff, size);
memory.read(data, min(memory.size(), size));
this->id = id;
}
void Flash::power() {
powered = true;
command = 0;
writeEnable = false;
}
void Flash::select(bool state) {
if(state) return;
//print("flash: deselect - cmd=",hex<2>(command)," wen=",writeEnable,"\n");
if(command && writeEnable) {
// Finish some previously submitted commands
if(command == 0x0a) {
// Read any remaining page into the buffer,
// so it's not lost during the erase cycle.
while(count < 256) {
buffer[count++] = data[address++];
if(!(address & 0xff)) address -= 0x100;
}
}
if(command == 0x0a || command == 0xdb || command == 0xd8) {
// Modify page / erase page / erase 64Kbyte sector
unsigned block = command == 0xd8? 0x10000 : 0x100;
address = page & ~(block-1);
// Erasing sets data bits to 1.
for(unsigned n = 0; n < block; n++)
data[address++] = 0xff;
writeEnable = false;
}
if(command == 0x0a || command == 0x02) {
// Modify page / write page
address = page;
//print("finishing write to ",hex<6>(page),"\n");
for(unsigned n = 0; n < 256; n++) {
// Writing can only clear bits.
data[address++] &= buffer[n];
if(!(address & 0xff)) address -= 0x100;
}
writeEnable = false;
}
}
// Reset and wait for a new command
command = 0;
}
uint8 Flash::transfer(uint8 data) {
if(command == 0) {
command = data;
count = 0;
address = 0;
addrCount = 0;
//print("flash: cmd ",hex<2>(command),"\n");
if(command == 0xab) powered = true; // Wake up
if(powered) {
if(command == 0x9f) count = 3; // Read ID
if(command == 0xb9) powered = false; // Power down
if(command == 0x06) writeEnable = true; // Write enable
if(command == 0x04) writeEnable = false; // Write disable
if(command == 0xdb || command == 0xd8) addrCount = 3; // Erase page / sector
if(command == 0x02 || command == 0x0a) addrCount = 3; // Write data
if(command == 0x03 || command == 0x0b) addrCount = 3; // Read data
}
return 0;
}
if(!powered)
return 0;
if(addrCount) {
// Input address
addrCount--;
address = address<<8 | data;
page = address;
if(command == 0x0b) count = 1; // Dummy byte between address and data
return 0;
}
if(command == 0x9f) {
// Read ID
//print("flash: read id\n");
if(count) count--;
return id >> 8*count;
}
if(command == 0x05) {
// Read status
//print("flash: read status\n");
return writeEnable<<1; // | writing<<0;
}
if(command == 0x03 || command == 0x0b) {
// Read data
//if(address == page) print("flash: read ",hex<6>(address),"\n");
if(count) { count--; return 0; }
if(address >= size) address = 0;
return this->data[address++];
}
if(command == 0x02 || command == 0x0a) {
// Write page / modify page
//if(address == page) print("flash: write ",hex<6>(address),"\n");
if(count < 0x100) {
if(address >= size) address = 0;
buffer[count++] = data;
uint8 r = this->data[address++];
if(!(address & 0xff)) address -= 0x100;
return r;
}
return 0;
}
return 0;
}

View File

@ -1,20 +0,0 @@
struct Flash : SPIDevice, StaticMemory {
~Flash();
Flash() {}
Flash(uint32 esize, uint32 id);
Flash(const stream& memory, uint32 esize, uint32 id);
void power();
void select(bool state);
uint8 transfer(uint8 data);
uint24 id;
uint8 command;
uint24 address, page;
unsigned addrCount;
unsigned count;
uint8 buffer[256];
bool powered;
bool writeEnable;
};

View File

@ -1,64 +0,0 @@
FRAM::~FRAM() { }
FRAM::FRAM(uint32 esize) {
data = new uint8[size = esize];
memset(data, 0xff, size);
}
FRAM::FRAM(const stream& memory, uint32 esize) {
data = new uint8[size = esize];
memset(data, 0xff, size);
memory.read(data, min(memory.size(), size));
}
void FRAM::power() {
command = 0;
writeEnable = false;
}
void FRAM::select(bool state) {
if(state) return;
// Reset and wait for a new command
if(command == 0x02) writeEnable = false;
command = 0;
}
uint8 FRAM::transfer(uint8 data) {
if(command == 0) {
command = data;
address = addrCount = 0;
if(command == 0x02) addrCount = 2;
if(command == 0x03) addrCount = 2;
if(command == 0x04) writeEnable = false;
if(command == 0x06) writeEnable = true;
return 0xff;
}
if(addrCount) { // Input address
addrCount--;
address = address<<8 | data;
return 0xff;
}
if(command == 0x01) {
return 0xff; // Write status
}
if(command == 0x05) { // Read status
return writeEnable<<1; // | writing<<0;
}
if(command == 0x03) { // Read data
if(address == size) address = 0;
return this->data[address++];
}
if(command == 0x02) { // Write data
if(address == size) address = 0;
return this->data[address++] = data;
}
return 0xff;
}

View File

@ -1,16 +0,0 @@
struct FRAM : SPIDevice, StaticMemory {
~FRAM();
FRAM(uint32 esize);
FRAM(const stream& memory, uint32 esize);
void power();
void select(bool state);
uint8 transfer(uint8 data);
uint8 command;
uint16 address, page, pageSize;
unsigned addrCount;
uint8 buffer[256];
bool writeEnable;
};

View File

@ -1,127 +0,0 @@
#include <nds/nds.hpp>
namespace NintendoDS {
Memory::~Memory() {}
struct UnmappedMemory : Memory {
uint32 read(uint32 addr, uint32 size) { return 0u; }
void write(uint32 addr, uint32 size, uint32 word) {}
};
static UnmappedMemory unmappedMemory;
StaticMemory::StaticMemory() { data = nullptr; size = 0u; }
StaticMemory::~StaticMemory() { if(data) delete[] data; }
uint8& StaticMemory::operator[](uint32 addr) { return data[addr]; }
uint32 StaticMemory::read(uint32 addr, uint32 size) {
if(size == Byte) return (*this)[addr];
if(size == Half) return addr &= ~1, (*this)[addr] | (*this)[addr+1]<<8;
if(size == Word) return addr &= ~3, (*this)[addr] | (*this)[addr+1]<<8 | (*this)[addr+2]<<16 | (*this)[addr+3]<<24;
}
void StaticMemory::write(uint32 addr, uint32 size, uint32 word) {
if(size == Half) addr &= ~1;
if(size == Word) addr &= ~3;
(*this)[addr] = word;
if(size >= Half) {
(*this)[addr+1] = word>>8;
}
if(size >= Word) {
(*this)[addr+2] = word>>16;
(*this)[addr+3] = word>>24;
}
}
ByteMemory::ByteMemory() { data = nullptr; size = 0u; }
ByteMemory::~ByteMemory() { if(data) delete[] data; }
uint8& ByteMemory::operator[](uint32 addr) { return data[addr]; }
uint32 ByteMemory::read(uint32 addr, uint32 size) {
return (*this)[addr] * 0x01010101;
}
void ByteMemory::write(uint32 addr, uint32 size, uint32 word) {
(*this)[addr] = word;
}
HalfMemory::HalfMemory() { data = nullptr; size = 0u; }
HalfMemory::~HalfMemory() { if(data) delete[] data; }
uint16& HalfMemory::operator[](uint32 addr) { return data[addr>>1]; }
uint32 HalfMemory::read(uint32 addr, uint32 size) {
if(size == Word) return addr &= ~3, (*this)[addr] + ((*this)[addr+2] << 16);
if(size != Word) return (*this)[addr] * 0x00010001;
}
void HalfMemory::write(uint32 addr, uint32 size, uint32 word) {
if(size == Word) addr &= ~3;
(*this)[addr] = word;
if(size == Word)
(*this)[addr+2] = word>>16;
}
WordMemory::WordMemory() { data = nullptr; size = 0u; }
WordMemory::~WordMemory() { if(data) delete[] data; }
uint32& WordMemory::operator[](uint32 addr) { return data[addr>>2]; }
uint32 WordMemory::read(uint32 addr, uint32 size) {
return (*this)[addr];
}
void WordMemory::write(uint32 addr, uint32 size, uint32 word) {
if(size == Word) (*this)[addr] = word;
if(size != Word) (*this)[addr] ^= ((*this)[addr] ^ word) & 0xffff << 8*(addr & 2);
}
SDRAM::SDRAM() { data = nullptr; size = 0u; }
SDRAM::~SDRAM() { if(data) delete[] data; }
uint16& SDRAM::operator[](uint32 addr) { return data[addr>>1]; }
uint32 SDRAM::read(uint32 addr, uint32 size) {
if(size == Word) return addr &= ~3, (*this)[addr] + ((*this)[addr+2] << 16);
if(size != Word) return (*this)[addr] * 0x00010001;
}
void SDRAM::write(uint32 addr, uint32 size, uint32 word) {
if(size == Word) {
addr &= ~3;
(*this)[addr+0] = word;
(*this)[addr+2] = word>>16;
}
if(size == Half) (*this)[addr] = word;
if(size == Byte) (*this)[addr] ^= ((*this)[addr] ^ word) & 0xff << 8*(addr & 1);
}
SRAM::SRAM() { data = nullptr; size = 0u; }
SRAM::~SRAM() { if(data) delete[] data; }
uint32& SRAM::operator[](uint32 addr) { return data[addr>>2]; }
uint32 SRAM::read(uint32 addr, uint32 size) {
return (*this)[addr];
}
void SRAM::write(uint32 addr, uint32 size, uint32 word) {
if(size == Word) (*this)[addr] = word;
if(size == Half) (*this)[addr] ^= ((*this)[addr] ^ word) & 0xffff << 8*(addr & 2);
if(size == Byte) (*this)[addr] ^= ((*this)[addr] ^ word) & 0xff << 8*(addr & 3);
}
#include "eeprom.cpp"
#include "fram.cpp"
#include "flash.cpp"
}

View File

@ -1,76 +0,0 @@
struct Memory {
virtual ~Memory();
virtual uint32 read(uint32 addr, uint32 size) = 0;
virtual void write(uint32 addr, uint32 size, uint32 word) = 0;
};
struct StaticMemory : Memory {
uint8_t *data;
unsigned size;
uint8& operator[](uint32 addr);
uint32 read(uint32 addr, uint32 size);
void write(uint32 addr, uint32 size, uint32 word);
StaticMemory();
~StaticMemory();
};
// Slot 2 SRAM - 8-bit; bytes only
struct ByteMemory : Memory {
uint8_t *data;
unsigned size;
uint8& operator[](uint32 addr);
uint32 read(uint32 addr, uint32 size);
void write(uint32 addr, uint32 size, uint32 word);
ByteMemory();
~ByteMemory();
};
// VRAM, Palettes - 16-bit; halves and words
struct HalfMemory : Memory {
uint16_t *data;
unsigned size;
uint16& operator[](uint32 addr);
uint32 read(uint32 addr, uint32 size);
void write(uint32 addr, uint32 size, uint32 word);
HalfMemory();
~HalfMemory();
};
// OAM, BIOS - 32-bit; halves and words
struct WordMemory : Memory {
uint32_t *data;
unsigned size;
uint32& operator[](uint32 addr);
uint32 read(uint32 addr, uint32 size);
void write(uint32 addr, uint32 size, uint32 word);
WordMemory();
~WordMemory();
};
// EWRAM - 16-bit; all sizes
struct SDRAM : Memory {
uint16_t *data;
unsigned size;
uint16& operator[](uint32 addr);
uint32 read(uint32 addr, uint32 size);
void write(uint32 addr, uint32 size, uint32 word);
SDRAM();
~SDRAM();
};
// TCM, IWRAM - 32-bit; all sizes
struct SRAM : Memory {
uint32_t *data;
unsigned size;
uint32& operator[](uint32 addr);
uint32 read(uint32 addr, uint32 size);
void write(uint32 addr, uint32 size, uint32 word);
SRAM();
~SRAM();
};

View File

@ -1,29 +0,0 @@
#ifndef NDS_HPP
#define NDS_HPP
// dasShiny - Nintendo DS emulator
// Copyright (c) 2012 Cydrak
// License: GPLv3
#include <emulator/emulator.hpp>
#include <libco/libco.h>
namespace NintendoDS {
enum : unsigned {
Byte = 8, Half = 16, Word = 32
};
#include <nds/interface/interface.hpp>
#include <nds/memory/memory.hpp>
#include <nds/system/system.hpp>
#include <nds/cpu/cpu.hpp>
#include <nds/apu/apu.hpp>
#include <nds/ppu/ppu.hpp>
#include <nds/gpu/gpu.hpp>
#include <nds/video/video.hpp>
#include <nds/slot1/slot1.hpp>
#include <nds/slot2/slot2.hpp>
#include <nds/wifi/wifi.hpp>
}
#endif

View File

@ -1,251 +0,0 @@
void PPU::renderBgs(unsigned y) {
unsigned bpp[4] = {};
unsigned mapw[4], maph[4]; // tiled dimensions
unsigned affw[4], affh[4]; // Affine dimensions
unsigned bitw[4], bith[4]; // Bitmap dimensions
for(unsigned n = 0; n < 4; n++) {
// Tiled dimensions
mapw[n] = bg[n].size & 1? 64 : 32;
maph[n] = bg[n].size & 2? 64 : 32;
// Affine settings
affw[n] = 16 << bg[n].size;
affh[n] = 16 << bg[n].size;
bitw[n] = 128 << (bg[n].size - (bg[n].size >= 3)); // 128,256,512,512
bith[n] = 128 << (bg[n].size - (bg[n].size >= 2)); // 128,256,256,512
bpp[n] = 8 << (bg[n].tiles & 1);
}
if(video.line == 0) {
bg[2].linex = bg[2].originx;
bg[2].liney = bg[2].originy;
bg[3].linex = bg[3].originx;
bg[3].liney = bg[3].originy;
}
if(bgMode == 6) {
// Large 512 x 1024 x 8bpp bitmap - uses all 512K BG
// Supposedly 3D is still available on BG0, though
// you'd have no VRAM leftover for textures...
bitw[2] = 512 << (bg[2].size & 1);
bith[2] = 1024 >> (bg[2].size & 1);
renderBitmapBg(2, 8, bitw[2], bith[2], y);
}
else {
// BG0 is either tiled or 3D render output
// BG1 is always tiled
if(bg0FromGPU == 0) renderTiledBg (0, mapw[0], maph[0], y);
renderTiledBg (1, mapw[1], maph[1], y);
// BG2-3 are a selectable mix of:
// - Tiled: 16-bit screen blocks with tile attributes
// - GBA: 8-bit affine maps, only tile number
// - NDS: 16-bit affine maps with tile attributes;
// these also work as 8/16-bpp bitmaps
//
if(bgMode == 0) { // 2 x tiled
renderTiledBg (2, mapw[2], maph[2], y);
renderTiledBg (3, mapw[3], maph[3], y);
}
if(bgMode == 1) { // Tiled + GBA
renderTiledBg (2, mapw[2], maph[2], y);
renderAffineBg(3, 8, affw[3], affh[3], y);
}
if(bgMode == 2) { // 2 x GBA
renderAffineBg(2, 8, affw[2], affh[2], y);
renderAffineBg(3, 8, affw[3], affh[3], y);
}
if(bgMode == 3) { // Tiled + NDS
renderTiledBg (2, mapw[2], maph[2], y);
if(bg[3].depth == 0) renderAffineBg(3, 16, affw[3], affh[3], y);
if(bg[3].depth == 1) renderBitmapBg(3, bpp[3], bitw[3], bith[3], y);
}
if(bgMode == 4) { // GBA + NDS
renderAffineBg(2, 8, affw[2], affh[2], y);
if(bg[3].depth == 0) renderAffineBg(3, 16, affw[3], affh[3], y);
if(bg[3].depth == 1) renderBitmapBg(3, bpp[3], bitw[3], bith[3], y);
}
if(bgMode == 5) { // 2 x NDS
if(bg[2].depth == 0) renderAffineBg(2, 16, affw[2], affh[2], y);
if(bg[3].depth == 0) renderAffineBg(3, 16, affw[3], affh[3], y);
if(bg[2].depth == 1) renderBitmapBg(2, bpp[2], bitw[2], bith[2], y);
if(bg[3].depth == 1) renderBitmapBg(3, bpp[3], bitw[3], bith[3], y);
}
}
bg[2].linex += bg[2].transform.dx_dv;
bg[2].liney += bg[2].transform.dy_dv;
bg[3].linex += bg[3].transform.dx_dv;
bg[3].liney += bg[3].transform.dy_dv;
}
void PPU::renderTiledBg(unsigned no, unsigned mapw, unsigned maph, unsigned y) {
auto &bg = this->bg[no];
if(bg.enable == false)
return;
uint32 flags = pxPriority*bg.priority + pxLayer*(2u+no);
unsigned line = bg.voffs + y;
unsigned finex = bg.hoffs % 8;
auto above = &this->above[8 - finex];
auto below = &this->below[8 - finex];
if(blendAbove & 1<<no) flags |= pxBlendAbove;
if(blendBelow & 1<<no) flags |= pxBlendBelow;
unsigned mapx = bg.hoffs / 8;
unsigned mapy = line / 8;
unsigned maddr = 0x8000*bgMapBase + 32*32*bg.map + 32*(mapy%32) + (mapx%32);
if(maph > 32 && (mapy & 32)) maddr += 32*mapw;
if(mapw > 32 && (mapx & 32)) maddr += 32*32;
bool ext = bg.depth && bgLargePal;
unsigned depth = bg.depth? 0x100 : 0x10;
unsigned imask = depth-1;
unsigned extpalbase = 0x1000*no;
if(bg.affineWrap) // special case for BG0/BG1 - this bit
extpalbase |= 0x2000; // makes them share BG2/BG3's palettes
for(unsigned x = 0; x < 256+8; ) {
uint16 attr = system.vmap.bg(which, 2*maddr>>14)[2*maddr];
int pal = (attr>>12)*depth + extpalbase;
uint1 vflip = attr>>11;
uint1 hflip = attr>>10;
uint10 tile = attr;
int row = line%8;
if(vflip) row ^= 7;
uint32 taddr = 0x10000*bgTileBase + 0x4000*bg.tiles + ((32*tile + 4*row) << bg.depth);
auto tref = &system.vmap.bg(which, taddr>>14)[taddr];
uint64 slice = 0;
for(int n = 3; n >= 0; --n)
slice = slice<<16 | tref[n];
int dir = +1;
if(hflip) dir = -1, x += 7;
for(unsigned n = 0; n < 8; n++, x += dir) {
if(window[x + 8 - finex] & 1<<no) {
if(unsigned index = slice & imask) {
uint32 bgr = bgPal[pal+index & 0xff];
if(ext) {
bgr = system.vmap.bgpal[which][pal+index >> 13][pal+index << 1];
bgr = (bgr<<3 & 0760000) | (bgr<<2 & 0007600) | (bgr<<1 & 0000076);
}
if(flags < above[x]) below[x] = above[x], above[x] = flags + bgr;
else if(flags < below[x]) below[x] = flags + bgr;
}
}
slice >>= (4 << bg.depth);
}
if(hflip) x += 9;
if((++maddr & 31) == 0) {
maddr -= 32;
if(mapw > 32) // wrap to next screen
maddr += mapx & 32? -32*32 : +32*32;
}
}
}
void PPU::renderAffineBg(unsigned no, unsigned mapDepth, unsigned mapW, unsigned mapH, unsigned y) {
auto &bg = this->bg[no];
if(bg.enable == false)
return;
bool ext = mapDepth==16 && bgLargePal;
uint32 flags = pxPriority*bg.priority + pxLayer*(2+no);
int32 fx = bg.linex;
int32 fy = bg.liney;
if(blendAbove & 1<<no) flags |= pxBlendAbove;
if(blendBelow & 1<<no) flags |= pxBlendBelow;
for(unsigned x = 8; x < 256+8; x++) {
if(window[x] & 1<<no) {
unsigned mx = fx >> 8;
unsigned my = fy >> 8;
if(bg.affineWrap)
mx &= 8*mapW-1, my &= 8*mapH-1;
if(mx < 8*mapW && my < 8*mapH) {
uint32 addr = 0x10000*bgMapBase + 2*32*32*bg.map + ((mapW*(my/8) + (mx/8)) << (mapDepth == 16));
uint16 attr = system.vmap.bg(which, addr>>14)[addr];
if(mapDepth == 8) {
// no special handling - pal, vflip, hflip will all be 0
attr = attr >> 8*(mx/8 & 1) & 0xff;
}
int pal = (attr>>12)*0x100 + no*0x1000;
uint1 vflip = attr>>11;
uint1 hflip = attr>>10;
uint10 tile = attr;
int row = my%8 ^ 7*vflip;
int col = mx%8 ^ 7*hflip;
uint32 taddr = 0x10000*bgTileBase + 0x4000*bg.tiles + 64*tile + 8*row + col;
uint8 index = system.vmap.bg(which, taddr>>14)[taddr] >> 8*(col & 1);
if(index) {
uint32 bgr = bgPal[pal+index & 0xff];
if(ext) {
bgr = system.vmap.bgpal[which][pal>>13][pal+index << 1];
bgr = (bgr<<3 & 0760000) | (bgr<<2 & 0007600) | (bgr<<1 & 0000076);
}
if(flags < above[x]) below[x] = above[x], above[x] = flags + bgr;
else if(flags < below[x]) below[x] = flags + bgr;
}
}
}
fx += bg.transform.dx_dh;
fy += bg.transform.dy_dh;
}
}
void PPU::renderBitmapBg(unsigned no, unsigned mapDepth, unsigned mapW, unsigned mapH, unsigned y) {
auto &bg = this->bg[no];
if(bg.enable == false)
return;
uint32 flags = pxPriority*bg.priority + pxLayer*(2+no);
int32 fx = bg.linex;
int32 fy = bg.liney;
if(blendAbove & 1<<no) flags |= pxBlendAbove;
if(blendBelow & 1<<no) flags |= pxBlendBelow;
for(unsigned x = 8; x < 256+8; x++) {
if(window[x] & 1<<no) {
unsigned mx = fx>>8, my = fy>>8;
if(bg.affineWrap)
mx &= mapW-1, my &= mapH-1;
if(mx < mapW && my < mapH) {
uint32 addr = 0x4000*bg.map + ((mapW*my + mx) << (mapDepth == 16));
uint16 data = system.vmap.bg(which, addr>>14)[addr] >> 8*(addr & 1);
uint32 bgr;
if(mapDepth== 8) bgr = bgPal[data &= 0xff];
if(mapDepth==16) bgr = (data<<3&62<<12)|(data<<2&62<<6)|(data<<1&62), data &= 0x8000;
if(data) {
if(flags < above[x]) below[x] = above[x], above[x] = flags + bgr;
else if(flags < below[x]) below[x] = flags + bgr;
}
}
}
fx += bg.transform.dx_dh;
fy += bg.transform.dy_dh;
}
}

View File

@ -1,159 +0,0 @@
void PPU::renderObjs(unsigned y) {
if(objEnable == false)
return;
for(unsigned n = 0; n < 128; n++) {
objinfo& obj = this->obj[n];
if(obj.renderMode == objinfo::hidden) continue;
uint8 width = 1 << obj.size;
uint8 height = 1 << obj.size;
uint8 row = y - obj.y;
bool wide = obj.shape == objinfo::wide;
bool tall = obj.shape == objinfo::tall;
bool affine = obj.renderMode & objinfo::affine;
bool size = obj.renderMode == objinfo::affineDouble;
if(obj.size <= 1) width <<= wide, height <<= tall;
if(obj.size >= 1) width >>= tall, height >>= wide;
if(wide && tall) width = 1, height = 1;
if(row >= 8*(height << size)) continue;
if(obj.x <= -8*(width << size)) continue;
if(obj.kind == objinfo::bitmap) renderBitmapObj(obj, width, height, row);
else renderTiledObj(obj, width, height, row);
}
}
void PPU::renderTiledObj(objinfo& obj, unsigned objw, unsigned objh, unsigned y) {
unsigned step = objTileMode? 0+objTileStep : 0;
unsigned srow = objTileMode? objw << obj.depth : 32;
unsigned tile = obj.index << step;
uint32 zmask = pxPriority*3u + pxLayer*7u;
uint32 flags = pxPriority*obj.priority + pxLayer*1u;
bool affine = obj.renderMode & objinfo::affine;
bool size = obj.renderMode == objinfo::affineDouble;
bool hflip = obj.transform & 8;
bool vflip = obj.transform & 16;
bool ext = obj.depth && objLargePal;
bool win = obj.kind == objinfo::window;
unsigned pal = obj.depth? 256*obj.palette : 16*obj.palette;
unsigned mask = obj.depth? 0xff : 0x0f;
if(blendAbove & 1<<4) flags |= pxBlendAbove;
if(blendBelow & 1<<4) flags |= pxBlendBelow;
if(obj.kind == objinfo::blend) flags |= pxBlendForce;
affineparam tf = { 0x100, 0, 0, 0x100 };
int32 cx = 8*objw/2, ox = 0 - (cx << size), dw = 8*objw << size;
int32 cy = 8*objh/2, oy = y - (cy << size), dh = 8*objh << size;
if(affine) {
tf = objTransform[obj.transform];
} else {
if(hflip) ox++, tf.dx_dh *= -1;
if(vflip) oy++, tf.dy_dv *= -1;
}
int32 fx = ox*tf.dx_dh + oy*tf.dx_dv;
int32 fy = ox*tf.dy_dh + oy*tf.dy_dv;
int x = obj.x + 8;
for(unsigned t = 0; t < dw && x < 256+8; x++, t++) {
uint32 mx = (fx >> 8) + cx;
uint32 my = (fy >> 8) + cy;
if(mx < 8*objw && my < 8*objh) {
unsigned taddr = tile;
taddr += (my/8)*srow;
taddr += (mx/8) << obj.depth; taddr <<= 3;
taddr += (my%8) << obj.depth; taddr <<= 2;
taddr += (mx%8) >>!obj.depth;
uint16 data = system.vmap.obj(which, taddr>>14)[taddr];
if(obj.depth) data = data >> 8*(mx%2) & mask;
else data = data >> 4*(mx%4) & mask;
if(x >= 0 && data) {
if(win) {
window[x] = min(window[x], 0x20+winArea[2]);
}
else {
uint32 bgr = objPal[pal+data & 0xff];
if(ext) {
bgr = system.vmap.objpal[which][pal>>13][pal+data << 1];
bgr = (bgr<<3 & 0760000) | (bgr<<2 & 0007600) | (bgr<<1 & 0000076);
}
if(flags < (objLayer[x] & zmask)) objLayer[x] = flags + bgr;
}
}
}
fx += tf.dx_dh;
fy += tf.dy_dh;
}
}
void PPU::renderBitmapObj(objinfo& obj, unsigned objw, unsigned objh, unsigned y) {
unsigned srow = objBitmapMode>1? 16*objw : 256<<objBitmapMode;
unsigned tile = obj.index;
if(objBitmapMode>1) tile <<= 7+objBitmapStep;
else tile = 8*srow*(obj.index>>5 & 31) + 16*(obj.index & 31);
uint32 zmask = pxPriority*3u + pxLayer*7u;
uint32 flags = pxPriority*obj.priority + pxLayer*1u + pxAlpha*2u*(obj.palette+1);
bool affine = obj.renderMode & objinfo::affine;
bool size = obj.renderMode == objinfo::affineDouble;
bool hflip = obj.transform & 8;
bool vflip = obj.transform & 16;
unsigned alpha = obj.palette;
if(blendAbove & 1<<4) flags |= pxBlendAbove;
if(blendBelow & 1<<4) flags |= pxBlendBelow;
flags |= pxBlendForce + (2*alpha+1)*pxAlpha;
affineparam tf = { 0x100, 0, 0, 0x100 };
int32 cx = 8*objw/2, ox = 0 - (cx << size), dw = 8*objw << size;
int32 cy = 8*objh/2, oy = y - (cy << size), dh = 8*objh << size;
if(affine) {
tf = objTransform[obj.transform];
} else {
if(hflip) ox++, tf.dx_dh *= -1;
if(vflip) oy++, tf.dy_dv *= -1;
}
int32 fx = ox*tf.dx_dh + oy*tf.dx_dv;
int32 fy = ox*tf.dy_dh + oy*tf.dy_dv;
int x = obj.x + 8;
for(unsigned t = 0; t < dw && x < 256+8; x++, t++) {
uint32 mx = (fx >> 8) + cx;
uint32 my = (fy >> 8) + cy;
if(mx < 8*objw && my < 8*objh) {
unsigned taddr = tile + srow*my + 2*mx;
uint16 data = system.vmap.obj(which, taddr>>14)[taddr];
if(x >= 0 && (data & 0x8000)) {
uint5 b = data>>10, g = data>>5, r = data>>0;
if(flags < (objLayer[x] & zmask)) objLayer[x] = flags | b<<13 | g<<7 | r<<1;
}
}
fx += tf.dx_dh;
fy += tf.dy_dh;
}
}

View File

@ -1,414 +0,0 @@
#include <nds/nds.hpp>
namespace NintendoDS {
PPU ppu[2];
PPU::PPU() {
}
void PPU::power() {
forceBlank = false;
bgLargePal = false;
bg0FromGPU = false;
bgMode = 0;
bgTileBase = 0;
bgMapBase = 0;
for(unsigned n = 0; n < 4; n++) {
bg[n].enable = false;
bg[n].mosaic = false;
bg[n].affineWrap = false;
bg[n].depth = 0;
bg[n].priority = 0;
bg[n].size = 0;
bg[n].map = 0;
bg[n].tiles = 0;
bg[n].palette = 0;
bg[n].hoffs = 0;
bg[n].voffs = 0;
bg[n].originx = 0;
bg[n].originy = 0;
bg[n].linex = 0;
bg[n].liney = 0;
bg[n].transform.dx_dh = 0;
bg[n].transform.dy_dh = 0;
bg[n].transform.dx_dv = 0;
bg[n].transform.dy_dv = 0;
}
objEnable = false;
objInHBlank = false;
objLargePal = false;
objTileMode = 0;
objTileStep = 0;
objBitmapMode = 0;
objBitmapStep = 0;
for(unsigned n = 0; n < 128; n++) {
obj[n].kind = objinfo::none;
obj[n].renderMode = objinfo::normal;
obj[n].mosaic = false;
obj[n].depth = 0;
obj[n].size = 0;
obj[n].shape = objinfo::square;
obj[n].transform = 0;
obj[n].x = 0;
obj[n].y = 0;
obj[n].index = 0;
obj[n].priority = 0;
obj[n].palette = 0;
}
for(unsigned n = 0; n < 32; n++) {
objTransform[n].dx_dh = 0;
objTransform[n].dy_dh = 0;
objTransform[n].dx_dv = 0;
objTransform[n].dy_dv = 0;
}
blendMode = 0;
blendAbove = 0;
blendBelow = 0;
blendAf = 0;
blendBf = 0;
blendYf = 0;
for(unsigned n = 0; n < 2; n++) {
winX[n][0] = 0; winX[n][1] = 0xff;
winY[n][0] = 0; winY[n][1] = 0xff;
}
winArea[0] = 0; winEnable[0] = false;
winArea[1] = 0; winEnable[1] = false;
winArea[2] = 0; winEnable[2] = false;
winArea[3] = 0;
}
void PPU::scanline() {
if(video.line < 192) {
// Set up backdrop
for(unsigned x = 0; x < 256+16; x++) {
above[x] = pxPriority*3u + pxLayer*6u + bgPal[0];
below[x] = pxPriority*3u + pxLayer*7u;
objLayer[x] = pxPriority*3u + pxLayer*7u;
if(blendAbove & 1<<5) above[x] |= pxBlendAbove; // can shade backdrop
if(blendBelow & 1<<5) above[x] |= pxBlendBelow; // can blend over backdrop
}
// Set up window buffer
bool useWindowing = winEnable[0] || winEnable[1] || winEnable[2];
if(useWindowing) memset(window, 0xc0+winArea[3], 256+16); // initialize with winout
else memset(window, 0x3f, 256+16); // draw everything
for(int w = 1; w >= 0; w--)
if(winEnable[w] && winY[w][0] <= video.line && video.line <= winY[w][1] && winX[w][1] > winX[w][0])
memset(&window[winX[w][0] + 8], 0x40+winArea[w], winX[w][1] - winX[w][0]);
// Draw layers
if(objEnable)
renderObjs(video.line);
renderBgs(video.line);
// Feed in 3D layer, if requested
if(bg[0].enable && bg0FromGPU && which == 0) {
auto *gpuOut = &gpu.output[256*video.line];
uint32 depth = pxPriority*bg[0].priority + pxLayer*2;
uint32 flags = pxBlendForce;
if(blendAbove & 1<<0) flags |= pxBlendAbove;
if(blendBelow & 1<<0) flags |= pxBlendBelow;
for(unsigned x = 0; x < 256; x++) {
if(!(window[x+8] & 1<<0)) continue;
if(gpuOut[x] < 1*pxAlpha) continue;
if(depth < above[8+x]) {
below[8+x] = above[8+x];
above[8+x] = depth | flags | gpuOut[x];
}
else if(depth < below[8+x]) {
below[8+x] = depth | flags | gpuOut[x];
}
}
}
// Add OBJs, if enabled
if(objEnable) {
for(unsigned x = 0; x < 256; x++) {
if(!(window[x+8] & 1<<4)) continue;
auto &objPx = objLayer[x+8];
auto &abovePx = above[x+8];
auto &belowPx = below[x+8];
if(objPx < abovePx) belowPx = abovePx, abovePx = objPx;
else if(objPx < belowPx) belowPx = objPx;
}
}
unsigned yf = min(32, 2*blendYf);
uint64 round = 02000200020;
uint64 shade = round;
if(blendMode == lighten)
shade += 07700770077ull*yf;
for(unsigned x = 0; x < 256; x++) {
auto &abovePx = above[x+8];
auto &belowPx = below[x+8];
unsigned af = min(32, 2*blendAf);
unsigned bf = min(32, 2*blendBf);
bool canShade = (abovePx & pxBlendAbove);
bool canBlend = (abovePx & (pxBlendAbove|pxBlendForce)) && (belowPx & pxBlendBelow);
bool forceAlpha = (abovePx & pxBlendForce ) && (belowPx & pxBlendBelow);
unsigned mode = forceAlpha? alphaBlend : blendMode;
if(!(mode && (window[x+8] & 1<<5))) {
output[x] = abovePx;
continue;
}
uint64 a = (abovePx & 0777777)*01000001ull & 07700770077ull;
uint64 b = (belowPx & 0777777)*01000001ull & 07700770077ull;
if(forceAlpha && (abovePx & 31*pxAlpha))
af = (abovePx>>18 & 31) + 1, bf = 32-af;
if(canBlend && mode == alphaBlend) a = (a*af + b*bf + round)/32;
else if(canShade && mode > alphaBlend) a = (a*(32 - yf) + shade)/32;
if(uint32 oflow = a & 010001000100)
a |= oflow - (oflow>>6);
a &= 07700770077;
output[x] = (a | a>>18) & 0777777;
}
}
}
#include "bg.cpp"
#include "obj.cpp"
uint32 PPU::readPalette(uint32 addr) {
addr &= ~3;
uint32* p = addr < 0x200? &bgPal[addr>>1 & 0xfe] : &objPal[addr>>1 & 0xfe];
uint5 b0 = p[0]>>13, g0 = p[0]>>7, r0 = p[0]>>1;
uint5 b1 = p[1]>>13, g1 = p[1]>>7, r1 = p[1]>>1;
return b0<<10 | g0<<5 | r0<<0 | b1<<26 | g1<<21 | r1<<16;
}
void PPU::writePalette(uint32 addr, uint32 size, uint32 data) {
uint32 mask = 0xffffffff;
if(size == Byte) size = Half;
if(size == Half) addr &= ~1, mask = 0xffff << 8*(addr & 3);
if(size == Word) addr &= ~3;
uint32* p = addr < 0x200? &bgPal[addr>>1 & 0xfe] : &objPal[addr>>1 & 0xfe];
uint5 b0 = data>>10, g0 = data>> 5, r0 = data>> 0;
uint5 b1 = data>>26, g1 = data>>21, r1 = data>>16;
if(mask & 0x0000ffff) p[0] = b0<<13 | g0<<7 | r0<<1;
if(mask & 0xffff0000) p[1] = b1<<13 | g1<<7 | r1<<1;
}
uint32 PPU::readOam(uint32 addr) {
addr &= ~3;
objinfo& obj = this->obj[addr/8];
affineparam& tf = this->objTransform[addr/32];
switch(addr & 4) {
case 0:
return obj.y<<0 | obj.renderMode<<8 | obj.kind<10
| obj.mosaic<<12 | obj.depth<<13 | obj.size<<14
| obj.x<<16 | obj.transform<<25 | obj.shape<<30;
case 4:
return obj.index<<0 | obj.priority<<10 | obj.palette<<12
| tf.m[addr>>3 & 3];
}
}
void PPU::writeOam(uint32 addr, uint32 size, uint32 data) {
if(size == Byte) return;
if(size == Word) {
addr &= ~3;
writeOam(addr+0, Half, data>>0);
writeOam(addr+2, Half, data>>16);
return;
}
objinfo& obj = this->obj[addr/8];
affineparam& tf = this->objTransform[addr/32];
switch(addr & 6) {
case 0:
obj.y = data>>0;
obj.renderMode = data>>8;
obj.kind = data>>10;
obj.mosaic = data>>12;
obj.depth = data>>13;
obj.shape = data>>14;
return;
case 2:
obj.x = data>>0;
obj.transform = data>>9;
obj.size = data>>14;
return;
case 4:
obj.index = data>>0;
obj.priority = data>>10;
obj.palette = data>>12;
return;
case 6:
tf.m[addr>>3 & 3] = data;
return;
}
}
uint32 PPU::regControl() {
return bgMode<<0 | bg0FromGPU<<3 | forceBlank<<7 | objInHBlank<<23
| objTileMode<<4 | objBitmapMode<<5 | objTileStep<<20 | objBitmapStep<<22
| bg[0].enable<<8 | bg[1].enable<<9 | bg[2].enable<<10 | bg[3].enable<<11
| objEnable<<12 | winEnable[0]<<13 | winEnable[1]<<14 | winEnable[2]<<15
| bgTileBase<<24 | bgMapBase<<27 | bgLargePal<<30 | objLargePal<<31;
}
void PPU::regControl(uint32 data, uint32 mask) {
if(mask & 0x000000ff) {
bgMode = data>>0;
bg0FromGPU = data>>3;
objTileMode = data>>4;
objBitmapMode = data>>5;
forceBlank = data>>7;
}
if(mask & 0x0000ff00) {
bg[0].enable = data>>8;
bg[1].enable = data>>9;
bg[2].enable = data>>10;
bg[3].enable = data>>11;
objEnable = data>>12;
winEnable[0] = data>>13;
winEnable[1] = data>>14;
winEnable[2] = data>>15;
}
if(mask & 0x00f00000) {
objTileStep = data>>20;
objBitmapStep = data>>22;
objInHBlank = data>>23;
}
if(mask & 0xff000000) {
bgTileBase = data>>24;
bgMapBase = data>>27;
bgLargePal = data>>30;
objLargePal = data>>31;
}
}
uint32 PPU::regBg(unsigned no) {
return bg[no].priority<<0
| bg[no].tiles<<2 | bg[no].mosaic<<6 | bg[no].depth<<7
| bg[no].map<<8 | bg[no].affineWrap<<13 | bg[no].size<<14;
}
void PPU::regBg(unsigned no, uint32 data, uint32 mask) {
//bg0 = 17 00 map 23, size -, tiles --, pri 0, 3D
//bg1 = f8 8b map 24, size 3, tiles 02, pri 3, 256 color
//bg2 = b0 01 map 16, size 2, tiles 00, pri 1, 16-bit affine
//bg3 = b4 0a map 20, size 2, tiles 02, pri 2, 16-bit affine
if(mask & 0x00ff) {
bg[no].priority = data>>0;
bg[no].tiles = data>>2;
bg[no].mosaic = data>>6;
bg[no].depth = data>>7;
}
if(mask & 0xff00) {
bg[no].map = data>>8;
bg[no].affineWrap = data>>13;
bg[no].size = data>>14;
}
}
uint32 PPU::regBgOffs(unsigned no) {
return bg[no].hoffs | bg[no].voffs<<16;
}
void PPU::regBgOffs(unsigned no, uint32 data, uint32 mask) {
bg[no].hoffs ^= (bg[no].hoffs ^ data) & mask; data >>= 16; mask >>= 16;
bg[no].voffs ^= (bg[no].voffs ^ data) & mask;
}
void PPU::regBgAffine(unsigned no, unsigned index, uint32 data, uint32 mask) {
if(index < 2) {
bg[no].transform.m[2*index+0] ^= (bg[no].transform.m[2*index+0] ^ data) & mask; data >>= 16; mask >>= 16;
bg[no].transform.m[2*index+1] ^= (bg[no].transform.m[2*index+1] ^ data) & mask;
}
if(index == 2) {
bg[no].originx ^= (bg[no].originx ^ data) & mask;
bg[no].linex = bg[no].originx;
}
if(index == 3) {
bg[no].originy ^= (bg[no].originy ^ data) & mask;
bg[no].liney = bg[no].originy;
}
}
void PPU::regWinDims(unsigned index, uint32 data, uint32 mask) {
auto dims = index == 0? winX : winY;
if(mask & 0x000000ff) dims[0][1] = data>>0;
if(mask & 0x0000ff00) dims[0][0] = data>>8;
if(mask & 0x00ff0000) dims[1][1] = data>>16;
if(mask & 0xff000000) dims[1][0] = data>>24;
}
uint32 PPU::regWinArea() {
return winArea[0] | winArea[1]<<8 | winArea[3]<<16 | winArea[2]<<24;
}
void PPU::regWinArea(uint32 data, uint32 mask) {
if(mask & 0x0000003f) winArea[0] = data>>0 & 0x3f;
if(mask & 0x00003f00) winArea[1] = data>>8 & 0x3f;
if(mask & 0x003f0000) winArea[3] = data>>16 & 0x3f;
if(mask & 0x3f000000) winArea[2] = data>>24 & 0x3f;
}
uint32 PPU::regBlend() {
return blendAbove<<0 | blendMode<<6 | blendBelow<<8;
}
void PPU::regBlend(unsigned index, uint32 data, uint32 mask) {
uint64 maskl = uint64(mask) << 32*index;
if(maskl & 0x000000003full) blendAbove = data>>0;
if(maskl & 0x00000000c0ull) blendMode = data>>6;
if(maskl & 0x0000003f00ull) blendBelow = data>>8;
if(maskl & 0x00001f0000ull) blendAf = data>>16;
if(maskl & 0x001f000000ull) blendBf = data>>24;
if(maskl & 0x1f00000000ull) blendYf = data>>0;
}
}

View File

@ -1,148 +0,0 @@
struct PPU {
PPU();
void power();
void scanline();
struct objinfo;
void renderBgs(unsigned y);
void renderTiledBg(unsigned no, unsigned mapW, unsigned mapH, unsigned y);
void renderAffineBg(unsigned no, unsigned mapDepth, unsigned mapW, unsigned mapH, unsigned y);
void renderBitmapBg(unsigned no, unsigned mapDepth, unsigned mapW, unsigned mapH, unsigned y);
void renderObjs(unsigned y);
void renderTiledObj (objinfo& obj, unsigned objw, unsigned objh, unsigned y);
void renderBitmapObj(objinfo& obj, unsigned objw, unsigned objh, unsigned y);
uint32 readPalette(uint32 addr);
void writePalette(uint32 addr, uint32 size, uint32 data);
uint32 readOam(uint32 addr);
void writeOam(uint32 addr, uint32 size, uint32 data);
uint32 regControl();
void regControl(uint32 data, uint32 mask);
uint32 regBg(unsigned no);
void regBg(unsigned no, uint32 data, uint32 mask);
uint32 regBgOffs(unsigned no);
void regBgOffs(unsigned no, uint32 data, uint32 mask);
void regBgAffine(unsigned no, unsigned index, uint32 data, uint32 mask);
void regWinDims(unsigned index, uint32 data, uint32 mask);
uint32 regWinArea();
void regWinArea(uint32 data, uint32 mask);
uint32 regBlend();
void regBlend(unsigned index, uint32 data, uint32 mask);
// PPU #0: PPU #1:
// Has access to VRAM banks 0..6 More limited features
// BG0 can source 3D GPU output Has access to VRAM banks 2,3,7,8
// Can use frame- and render-buffers Max 128K BG, 128K OBJ
// Max 512K BG, 256K OBJ
int which;
uint1 powered;
// Internal pixel format: pplllMBAF aaaaa bbbbbb gggggg rrrrrr
enum {
pxPriority = 1<<30, // p - from BG control
pxLayer = 1<<27, // l - 1: OBJ, 2..5:BG, 6:backdrop
pxMosaic = 1<<26, // M
pxBlendBelow = 1<<25, // B
pxBlendAbove = 1<<24, // A
pxBlendForce = 1<<23, // F - for 3D/OBJ pixel
pxAlpha = 1<<18, // a - alpha (0=solid)
};
uint32 output[256];
uint32 above[256 + 16];
uint32 below[256 + 16];
uint32 objLayer[256 + 16];
uint32 bgPal[256];
uint32 objPal[256];
union affineparam {
struct {
int16 dx_dh, dx_dv; // A, B
int16 dy_dh, dy_dv; // C, D
};
int16 m[4];
};
struct bginfo {
uint1 enable;
uint1 mosaic, depth;
uint1 affineWrap;
uint2 priority;
uint2 size;
uint5 map; // 2K offset for maps / 16K for bitmaps
uint4 tiles; // 16K offset for tiles
uint2 palette; // 8K offset for large palette mode
uint9 hoffs, voffs; // Tiled scrolling
int32 originx, originy; // Affine settings
int32 linex, liney;
affineparam transform;
};
struct objinfo {
uint2 kind; enum { none=0, blend=1, window=2, bitmap=3 };
uint2 renderMode; enum { normal=0, affine=1, hidden=2, affineDouble=3 };
uint1 mosaic;
uint1 depth;
uint2 size;
uint2 shape; enum { square=0, wide=1, tall=2 };
uint5 transform; enum { hflip=8, vflip=16 };
int9 x;
uint8 y;
uint10 index;
uint2 priority;
uint4 palette; // subpalette for tiled; alpha for bitmap sprites
};
uint1 forceBlank;
uint1 bgLargePal; // 256-color BGs use system.vmap.bgpal
uint1 bg0FromGPU; // Display 3D rendering on BG0
uint3 bgMode; // Determines affine vs. tiled layers
uint3 bgTileBase; // 64K starting base for tiles
uint3 bgMapBase; // 64K starting base for maps
uint4 bgMosaicX;
uint4 bgMosaicY;
bginfo bg[4];
uint1 objEnable;
uint1 objInHBlank;
uint1 objLargePal; // 256-color OBJs use system.vmap.objpal
uint1 objTileMode; // 0 = 256x256 px sprite sheet, 1 = linear
uint2 objBitmapMode; // 0 = 128x256, 1 = 256x256, 2..3 = linear
uint2 objTileStep; // tile index << in linear mode
uint1 objBitmapStep; //
uint4 objMosaicX;
uint4 objMosaicY;
objinfo obj[128];
affineparam objTransform[32];
uint1 winEnable[3]; // [win0, win1, winobj, winout]
uint8 winArea[4]; // BLD, OBJ, BG3..BG0
uint8 winX[2][2]; //
uint8 winY[2][2]; // not uint9 - Nintendo's oversight
uint8 window[256+16]; // window no.<<6 | area
uint2 blendMode; enum { alphaBlend=1, lighten=2, darken=3 };
uint6 blendAbove; // BG0..BG3, OBJ, lignten/darken BD
uint6 blendBelow; // BG0..BG3, OBJ, blend over BD
uint5 blendAf, blendBf; // alphaBlend coefficients
uint5 blendYf; // lighten/darken coefficient
};
extern PPU ppu[2];

View File

@ -1,244 +0,0 @@
#include <nds/nds.hpp>
namespace NintendoDS {
Slot1 slot1;
Slot1::Slot1() {
card = nullptr;
}
void Slot1::power() {
if(card) {
card->power();
if(card->spi)
card->spi->power();
}
enable = true;
decryptLatency = 0;
responseLatency = 0;
xorData = 0;
xorCmds = 0;
dataReady = 0;
blockSize = 0;
clock = 0;
secureMode = 0;
transferPending = 0;
transferIrq = 0;
transferLength = 0;
command = 0;
lfsr[0] = 0;
lfsr[1] = 0;
spi.data = 0;
spi.baud = 0;
spi.hold = 0;
spi.busy = 0;
spi.enable = 0;
}
void Slot1::load(GameCard* card) {
this->card = card;
if(card->spi)
card->spi->power();
}
GameCard* Slot1::unload() {
//if(card && card->spi)
// card->spi->select(false);
auto r = card;
card = nullptr;
return r;
}
void Slot1::startRomTransfer() {
transferPending = true;
transferLength = 0;
if(blockSize == 7) transferLength = 4;
else if(blockSize) transferLength = 512 << (blockSize-1);
if(transferLength) dataReady = true;
else transferPending = false;
if(card) card->command(command);
// Hack for now, until we implement timing. Should test whether the ARM9
// halts during this entire DMA - one complete word transfers every 40 or
// 64 clocks at 66MHz.
CPUCore *arm = arm9.slot1access? (CPUCore*)&arm9 : (CPUCore*)&arm7;
//while(transferLength > 0 && arm->dmaTrigger(0xf, 5)) /**/;
int ch = -1;
for(unsigned n = 0; n < 4; n++)
if(arm->dma[n].enable && arm->dma[n].trigger == 5)
ch = n;
while(ch >= 0 && transferLength > 0)
arm->dmaTransfer(ch);
}
uint8 Slot1::readRom() {
uint8 data = 0xff;
if(transferLength) {
// Empty slot simply returns 0xff
if(card) data = card->read();
if(!--transferLength) {
dataReady = false;
transferPending = false;
if(transferIrq) {
if(arm9.slot1access == 0) arm7.interrupt.flags |= CPUCore::irqCardDone;
if(arm9.slot1access == 1) arm9.interrupt.flags |= CPUCore::irqCardDone;
}
}
}
return data;
}
uint8 Slot1::spiTransfer(uint8 data) {
if(!card || !card->spi)
return 0xff;
auto device = card->spi;
device->select(true);
uint8 r = device->transfer(data);
device->select(spi.hold);
return r;
}
GameCard::~GameCard() {
delete spi;
}
GameCard::GameCard(uint32 id) {
rom.size = 0x10000;
rom.data = new uint8[rom.size];
memset(rom.data, 0xff, rom.size);
size = bit::round(rom.size);
chipId = id;
spi = nullptr;
}
GameCard::GameCard(const stream& memory, uint32 esize, uint32 id) {
rom.size = esize;
rom.data = new uint8[rom.size];
memset(rom.data, 0xff, rom.size);
memory.read(rom.data, min(memory.size(), rom.size));
size = bit::round(rom.size);
sha256 = nall::sha256(rom.data, rom.size);
chipId = id;
spi = nullptr;
}
void GameCard::power() {
state = idle;
}
void GameCard::command(uint64 command) {
if((command>>56) == 0xb7) {
state = readData;
offset = command>>24 & 0x00000fff;
block = command>>24 & 0xfffff000;
block &= size-1;
// Once initialized, forbid reading the header and startup code.
if(block < 0x8000) block += 0x8000;
}
if((command>>56) == 0xb8) {
state = readId;
offset = 0;
}
}
uint8 GameCard::read() {
uint8 r = 0xff;
if(state == readData) {
// We remain in this state indefinitely, until the next command.
// However, reading from most (?) game cards wraps at 4K intervals.
uint32 addr = (block + offset++) & size-1;
offset &= 0xfff;
// Cards come in 2^n ROM sizes, however many images have the empty space
// trimmed off. Homebrew images aren't even padded out! Rather than waste
// memory, just check for this and return $ff.
if(addr < rom.size)
r = rom.data[addr];
}
if(state == readId) {
// Need to ensure the matching ID is in RAM or games won't run
// (where does the firmware put it?)
r = 0;/**/chipId >> 8*offset++;
offset &= 3;
}
return r;
}
void IRPort::power() {
bypass = false;
command = 0;
if(slave)
slave->power();
}
void IRPort::select(bool state) {
if(bypass) {
slave->select(state);
}
if(state == false) {
bypass = false;
command = 0;
}
}
uint8 IRPort::transfer(uint8 data) {
if(bypass) {
// Pass transfers through to flash memory
if(slave) return slave->transfer(data);
else return 0xff;
}
if(command == 0x00) {
command = data;
// Engage passthrough mode - allows access to save data.
if(command == 0x00) {
bypass = true;
return 0xff;
}
if(command == 0x01) {
// Receive? HG/SS expect an 8-bit packet size,
// then 0..185 bytes, all XORed with $aa.
return 0x00;
}
if(command == 0x02) {
// Write a packet, maybe to transmit buffer
return 0xff;
}
if(command == 0x08) {
// ID of some kind? Is it even a valid command?
// Returns $aa after powerup - haven't tested the Pokewalker with it.
// Pokemon HG/SS frequently check this, even when not communicating.
return 0xaa;
}
}
return 0xff;
}
}

View File

@ -1,113 +0,0 @@
struct GameCard;
struct SPIDevice;
// Pins 1-17: GND, Clock, -, /ROM, /RESET, /SPI, /IRQ, VCC, D0-D7, GND
//
// This is a hybrid serial-parallel bus, one byte transferred each clock.
// Both the cards and the slot support XOR obfuscation using a 39-bit LFSR,
// and some commands are further encrypted via a Blowfish variant.
//
// Transfer rates are 33MHz /5 or /8, which is 4-6MB/sec, and there's a
// 32-bit buffer making it possible to DMA straight to VRAM. For perspective,
// 6MB/s is enough to stream 256x192x16bpp video + audio at 60fps.
struct Slot1 {
Slot1();
void power();
void load(GameCard* card);
GameCard* unload();
void startRomTransfer();
uint8 readRom();
uint8 spiTransfer(uint8 data);
GameCard *card;
uint1 enable;
// ROM interface
uint1 clock; // 33MHz / {5, 8}
uint13 decryptLatency; // clocks to wait for card to decrypt command
uint6 responseLatency; // clocks to wait for response (data buffering?)
uint2 xorData; // XOR data received (2 bits?)
uint1 xorCmds; // XOR commands sent
uint1 dataReady; // 32 bits buffered and ready in read port
uint3 blockSize; // {0, 512 bytes (usual), 1K-16K, 32 bits}
uint1 secureMode; // ?
uint1 transferIrq; // generate IRQ at end of block
uint1 transferPending; // still more bytes to transfer?
uint32 transferLength; // # bytes remaining
uint64 command; // latch holding 8 command bytes for next transfer
uint64 lfsr[2]; // registers used to obfuscate communication
// Serial interface for EEPROM, flash, and peripheral access.
// Each transfer writes and reads 8 bits, one bit per clock.
// Pins D6/D7 do double duty as data in/out respectively.
struct SPI {
uint8 data; // read from last transfer
uint2 baud; // 4MHz >> n
uint1 enable, busy, hold;
} spi;
};
struct GameCard {
virtual ~GameCard();
GameCard(uint32 id);
GameCard(const stream& memory, uint32 size, uint32 id);
virtual void power();
virtual void command(uint64 command);
virtual uint8 read();
int state; enum { idle, readData, readId };
uint32 block; // 4K block for reading
uint32 offset; // byte offset
uint32 size; // power of 2 rom size
uint64 lfsr[2]; // for obfuscation
uint32 chipId;
string sha256;
StaticMemory rom;
SPIDevice *spi;
};
struct IRPort : SPIDevice {
IRPort() : slave(nullptr) { }
IRPort(SPIDevice *slave) : slave(slave) { }
void power();
void select(bool state);
uint8 transfer(uint8 data);
SPIDevice *slave;
bool bypass;
uint8 command;
};
struct S1EEPROM : SPIDevice, StaticMemory {
void select(bool state);
uint8 transfer(uint8 data);
int state; enum { idle, params, dataIo };
uint8 command; enum {
cmdWriteDisable, cmdWriteEnable, // 0x04, 0x06
cmdWriteStatus, cmdReadStatus, // 0x01, 0x05
cmdWrite, cmdRead, // 0x02, 0x03
cmdReadID, // 0x9f
};
uint8 numArgs;
uint32 offset;
// Status register
uint1 busy;
uint1 writeEnable;
uint2 writeProtect;
uint1 statusLock;
uint1 unlockable; // /W pin, presumably enabled on game cards
};
extern Slot1 slot1;

View File

@ -1,239 +0,0 @@
Clock::~Clock() { }
Clock::Clock() {
stoppedSec = 0;
stoppedUSec = 0;
}
void Clock::reset() {
status1 = 0; year = 0; hour = 0;
status2 = 0; month = 1; minute = 0;
adjust = 0; day = 1; second = 0;
userByte = 0; weekday = 0;
for(int i = 0; i < 2; i++)
for(int j = 0; j < 3; j++)
alarm[i].setting[j] = 0;
}
void Clock::power() {
secondsTickPending = hold = false;
intr[0] = intr[1] = lag = 0;
lastClk = lastCs = output = false;
command = buffer = nbits = dataOut = 0;
// The clock is 32768 Hz but we need twice that, because serial
// interrupts are edge sensitive.
event.action = [&]() {
tick();
arm7.event.queue.add(2*33513982/(2*32768), event);
};
arm7.event.queue.add(2*33513982/(2*32768), event);
}
uint4 Clock::io(uint4 pins) {
uint1 dataIn = pins>>0;
uint1 clk = pins>>1;
uint1 cs = pins>>2;
if(cs != lastCs && !(lastCs = cs)) {
command = nbits = 0;
output = false;
return 0xf;
}
// Wait for falling CLK edge with CS high..
if(clk != lastClk && !(lastClk = clk)) {
// Rotate data through the buffer
dataOut = buffer;
buffer = dataIn<<7 | buffer>>1;
if(++nbits == 0) {
// Have we got a full byte to act on?
// Accept a new command. They're MSB first, unlike data:
if(!command && (buffer & 0x0f) == 6) { // 0110
command = buffer & 0x7f; // ccc
output = buffer & 0x80; // D
index = 0;
// Prevent the time changing while read
if(command == 0x26 || command == 0x66)
hold = true;
if(output == false) return 0xf;
else /*fall through and latch output byte for reading*/;
}
if(command == 0x06) field(status1, resetBit|hourMode);
if(command == 0x46) field(status2, int1Mode|int2Mode|testMode);
if(command == 0x26) dateTime();
if(command == 0x66) dateTime();
if(command == 0x16) alarmTime(0);
if(command == 0x56) alarmTime(1);
if(command == 0x36) field(adjust, 0xff);
if(command == 0x76) field(userByte, 0xff);
}
}
return output? dataOut | 0xe : 0xf;
}
void Clock::field(uint8& src, uint8 mask) {
if(command == 0x06 && output == 0 && (buffer & resetBit)) {
reset(); buffer &= ~resetBit;
}
if(output) buffer = src;
else src ^= (src ^ buffer) & mask;
}
void Clock::dateTime() {
if(index >= 7) return;
if(command == 0x66) // start from time field
index = max(4, index);
bool fixHour = !(status1 & hourMode) && index == 4;
if(output) {
buffer = time[index++];
if(fixHour && buffer >= 0x12)
buffer += 0x40 - 0x12; // Convert back to 12-hour + AM/PM
return;
}
time[index] = buffer; // Should correct these properly..
if(fixHour && (buffer & 0x40))
time[index] += 0x12 - 0x40; // Convert from 12-hour mode
index++;
}
void Clock::alarmTime(unsigned no) {
if(index >= 3) return;
if(output) buffer = alarm[no].setting[index++];
else alarm[no].setting[index++] = buffer;
}
bool Clock::incBCD(uint8& val, uint8 first, uint8 last) {
val++;
if((val & 0x0f) == 0x0a) val += 0x06;
if((val & 0xf0) == 0xa0) val += 0x60;
if(val > last) val = first;
return val == first; // carry out
}
void Clock::tick() {
bool previously = intr[0] | intr[1];
if(++counter == 0)
secondsTickPending = true;
// Ticks can be held for up to 500 ms while reading the time.
if(secondsTickPending && (hold == false || counter >= 2*16384)) {
lag = counter;
tickSecond();
}
switch(status2 & int2Mode /*0..1*/) {
case 0: intr[1] = 0; break;
default: break; // Alarm 2 - checked in tickSecond
}
switch(status2 & int1Mode /*0..15*/) {
case 0: intr[0] = 0; break;
case 4: break; // Alarm 1 - checked in tickSecond
case 1: /**/;
case 5: // Selectable frequency - often used as seconds interrupt.
intr[0] = 0; // /IRQ output is the AND of the selected divisors.
for(unsigned divisor = 0; divisor <= 4; divisor++) // 1..16 Hz
intr[0] |= (~counter >> 15-divisor) & (alarm[0].minute >> divisor) & 1;
break;
case 2: /**/; // Start-of-minute interrupts
case 6: intr[0] |= !second && counter-lag < 2*4u; break; // latched
case 3: intr[0] = second < 0x30; break; // 30 sec duty
case 7: intr[0] = counter-lag < lag+2*128u; break; // 128 ticks duty
default: intr[0] = !(counter & 1<<0); break; // 32KHz clock
}
if(intr[0]) status1 |= int1Flag;
if(intr[1]) status1 |= int2Flag;
bool interrupt = intr[0] | intr[1];
if(arm7.sio.irq && interrupt && previously == false) {
//arm7.raiseIrq(CPUCore::irqClock);
arm7.interrupt.flags |= CPUCore::irqClock;
}
}
void Clock::tickSecond() {
secondsTickPending = false;
if( incBCD(second, 0, 0x59)
&& incBCD(minute, 0, 0x59)
&& incBCD(hour, 0, 0x23))
tickDay();
// Check alarms - not sure how this is supposed to work yet.
// The datasheet implies this is checked each minute, something like:
// d:hh:mm weekly d:**:mm hourly for one day a week?
// *:hh:mm daily d:**:** every minute for one day a week?
// *:**:mm hourly? d:hh:** every minute for one hour on one day?
// *:**:** every minute? *:hh:** every minute for one hour a day
if(second == 0x00) {
bool enable[2] = {
(status2 & int1Mode) == 4, // Alarm 1
(status2 & int2Mode) != 0, // Alarm 2
};
// Triggered at start of minute; cleared by disabling alarm
for(unsigned i = 0; i < 2; i++)
if(enable[i]
&& (weekday == (alarm[i].weekday & 0x07) || alarm[i].weekday < 0x80)
&& (hour == (alarm[i].hour & 0x1f) || alarm[i].hour < 0x80)
&& (minute == (alarm[i].minute & 0x3f) || alarm[i].minute < 0x80))
intr[i] = 1;
}
}
void Clock::tickDay() {
if(++weekday == 7) weekday = 0;
unsigned y = 10*(year>>4) + (year & 15);
unsigned daysIn[32] = { 0, // (invalid)
0x31,0x28+!(y % 4), // Jan..Feb
0x31,0x30,0x31,0x30,0x31, // Mar..Jul
0x31,0x30, 0,0,0,0,0,0, // Aug..Sep (+ invalid months)
0x31,0x30,0x31 // Oct..Dec
};
incBCD(day, 1, daysIn[month & 31])
&& incBCD(month, 1, 0x12)
&& incBCD(year, 0, 0x99);
}
void Clock::thaw(int64 curSec, int64 curUSec) {
// Calculate time elapsed while the emulation wasn't running..
// Then advance RTC to make up for it.
int64 secs = curSec - stoppedSec;
int64 usecs = curUSec - stoppedUSec;
if(usecs < 0) secs -= 1, usecs += 1000000;
int32 ticks = counter + usecs * 32768/1000000;
counter = ticks;
if(ticks >= 32768) secs += 1;
while(secs > 86400) { secs -= 86400; tickDay(); }
while(secs > 0) { secs -= 1; tickSecond(); }
stoppedSec = curSec;
stoppedUSec = curUSec;
}
void Clock::freeze(int64 curSec, int64 curUSec) {
stoppedSec = curSec;
stoppedUSec = curUSec;
}

View File

@ -1,120 +0,0 @@
template<class T> struct Event_of {
uint32 time;
uint32 node;
T action;
Event_of() : time(0), node(0) {}
Event_of(T action) : time(0), node(0), action(action) {}
inline bool operator<(const Event_of& rhs) const { return time - rhs.time >= 0x80000000u; }
inline bool operator<(uint32 rhs) const { return time - rhs >= 0x80000000u; }
};
template<class T> struct EventQueue_of {
typedef Event_of<T> Elem;
uint32 time;
EventQueue_of(uint32 capacity) : size(0), time(0) {
//items.reserve(capacity+1); // to simplify math, items[0] is unused
for(unsigned n = 0; n < capacity+1; n++)
items[n] = nullptr;
}
void reset() {
clear();
time = 0;
}
void clear() {
for(unsigned n = 1; n <= size; n++) {
items[n]->node = 0;
items[n] = nullptr;
}
size = 0;
}
bool valid(uint32 n = 1) {
return (2*n+0 > size || *(items[n]) < items[2*n+0]->time+1 && valid(2*n+0))
&& (2*n+1 > size || *(items[n]) < items[2*n+1]->time+1 && valid(2*n+1));
}
void add(uint32 dt, Elem& e) {
if(e.node) {
remove(e);
return add(dt, e);
}
uint32 old = e.time;
e.time = time + dt;
e.node = ++size;
bubble(e);
if(valid() == false) asm volatile("\n\t int $3");
}
void remove(Elem& e) {
if(e.node == 0) return;
items[e.node] = items[size];
items[e.node]->node = e.node;
items[size--] = nullptr;
if(size > 1 && e.node <= size) {
Elem& m = *(items[e.node]);
if(m < e) {
// This can happen if removing something besides the min element.
bubble(m);
if(valid() == false) asm volatile("\n\t int $3");
}
else {
while(2*m.node <= size) {
uint32 l = 2*m.node;
uint32 r = 2*m.node + (l < size);
uint32 c = *(items[r]) < *(items[l])? r : l;
if(!(*(items[c]) < m)) break;
items[m.node] = items[c];
items[m.node]->node = m.node;
m.node = c;
}
items[m.node] = &m;
if(valid() == false) asm volatile("\n\t int $3");
}
}
e.node = 0;
}
inline void step(uint32 ticks) {
ticks += time; // relative -> absolute time
time = ticks; // default case for early out
// Caution! We're running under the assumption our queue is never empty.
// Fortunately, due to audio/video events, that's true in dasShiny.
// Gives around 16% speedup in some cases.
if(!(*(items[1]) < ticks)) return; // early out, nothing to do
auto &next = items[1];
do {
Elem& e = *next;
time = e.time; // update the time so add()
remove(e); // works during the callback.
e.action();
} while(/*size && */*next < ticks);
if(time - ticks >= 0x80000000u)
time = ticks;
}
//private:
//linear_vector<Elem*> items;
Elem* items[64];
uint32 size;
void bubble(Elem& e) {
while(1 < e.node && e < *(items[e.node/2])) {
items[e.node] = items[e.node/2];
items[e.node]->node = e.node;
e.node /= 2;
}
items[e.node] = &e;
}
EventQueue_of();
};

View File

@ -1,14 +0,0 @@
PowerMgr::~PowerMgr() { }
void PowerMgr::power() {
}
void PowerMgr::select(bool state) {
}
uint8 PowerMgr::transfer(uint8 data) {
return 0;
}

View File

@ -1,547 +0,0 @@
#if !defined(_WIN32)
#include <nds/nds.hpp>
#include <sys/time.h>
#else
// Suppress mingw's timeval which uses 32-bit longs
#define _TIMEVAL_DEFINED
#include <stdint.h>
#include <sys/time.h>
#include <nds/nds.hpp>
#if defined(__amd64__) || defined(_M_AMD64)
struct timeval {
int64_t tv_sec, tv_usec;
};
#define timegm(tm) _mkgmtime64(tm)
#define gmtime(tv) _gmtime64(tv)
#else
//Windows 32-bit run-time doesn't have 64-bit time functions
struct timeval {
time_t tv_sec, tv_usec;
};
#define timegm(tm) mktime(tm)
#define gmtime(tv) localtime(tv)
#endif
#define gettimeofday(tv,tz) gettimeofday64(tv,tz)
int gettimeofday64(struct timeval *tv, struct timezone *tz) {
FILETIME ft;
GetSystemTimeAsFileTime(&ft); // UTC in 100ns units
// UNIX epoch: Jan 1 1970
// Windows epoch: Jan 1 1601
//
// 1970 - 1601 = 369 years = 11636784000 sec
// 89 leap years => +89 days = +7689600 sec
int64_t diff = 11644473600LL * 10000000LL;
int64_t ns = ft.dwLowDateTime + ft.dwHighDateTime * (1LL<<32) - diff;
tv->tv_sec = ns / 10000000;
tv->tv_usec = ns % 10000000 / 10;
}
#endif
namespace NintendoDS {
System system;
SPIDevice::~SPIDevice() { }
VRAMMapping::VRAMMapping() {
data = nullptr; size = 0;
dirtyBits = nullptr;
}
VRAMMapping::VRAMMapping(HalfMemory& target, uint8 *dirtyBits, unsigned pageno)
: dirtyBits(dirtyBits + pageno*256/0x4000)
{
data = target.data + 0x4000/2*pageno; size = 0;
}
VRAMMapping::~VRAMMapping() {
data = nullptr; size = 0;
dirtyBits = nullptr;
}
uint16& VRAMMapping::operator[](uint32 addr) {
return data? data[addr>>1 & 0x1fff]
: system.unmappedVram[addr>>1 & 0x1fff];
}
uint32 VRAMMapping::read(uint32 addr, uint32 size) {
if(!data) return 0;
return HalfMemory::read(addr & 0x3fff, size);
}
void VRAMMapping::write(uint32 addr, uint32 size, uint32 word) {
if(!data) return; HalfMemory::write(addr & 0x3fff, size, word);
if(!dirtyBits) return; dirtyBits[(addr & 0x3fff)/256] = 0xff;
}
bool VRAMMapping::dirty(uint32 addr) {
return dirtyBits && dirtyBits[(addr & 0x3fff)/256];
}
System::System() {
firmware.data = new uint8 [(firmware.size = 0x080000)/1]();
ewram.data = new uint16[(ewram.size = 0x400000)/2];
iwram.data = new uint32[(iwram.size = 0x010000)/4];
swram[0].data = new uint32[(swram[0].size = 0x004000)/4];
swram[1].data = new uint32[(swram[1].size = 0x004000)/4];
vram[0].data = new uint16[(vram[0].size = 0x020000)/2];
vram[1].data = new uint16[(vram[1].size = 0x020000)/2];
vram[2].data = new uint16[(vram[2].size = 0x020000)/2];
vram[3].data = new uint16[(vram[3].size = 0x020000)/2];
vram[4].data = new uint16[(vram[4].size = 0x010000)/2];
vram[5].data = new uint16[(vram[5].size = 0x004000)/2];
vram[6].data = new uint16[(vram[6].size = 0x004000)/2];
vram[7].data = new uint16[(vram[7].size = 0x008000)/2];
vram[8].data = new uint16[(vram[8].size = 0x004000)/2];
wxram.data = new uint16[(wxram.size = 0x002000)/2];
memset(firmware.data, 0, firmware.size);
callerThread = nullptr;
activeThread = nullptr;
running = false;
}
void System::mapVram(VRAMMapping* loc, uint8 *dirtyBits, unsigned npages, HalfMemory& bank) {
for(unsigned offset = 0; offset < npages; offset++)
loc[offset] = VRAMMapping{ bank, dirtyBits, offset % (bank.size/0x4000) };
}
void System::clearVmap() {
for(unsigned n = 0; n < 8; n++) {
for(auto &m : vmap.arm9[n]) m = VRAMMapping{};
}
for(auto &m : vmap.arm7) m = VRAMMapping{};
for(auto &m : vmap.tex) m = VRAMMapping{};
for(auto &m : vmap.texpal) m = VRAMMapping{};
for(auto &m : vmap.bgpal[0]) m = VRAMMapping{};
for(auto &m : vmap.objpal[0]) m = VRAMMapping{};
for(auto &m : vmap.bgpal[1]) m = VRAMMapping{};
for(auto &m : vmap.objpal[1]) m = VRAMMapping{};
}
void System::updateVmap() {
static const uint32 offsets[] = {
0x00000, 0x20000, 0x40000, 0x60000, 0x80000, 0x90000, 0x94000, 0x98000, 0xa0000
};
static const uint32 altOffsets[] = {
0x00000, 0x04000, 0x10000, 0x14000
};
clearVmap();
gpu.texCache.reset();
auto bank = vmap.regs;
for(unsigned n = 0; n < 7; n++) {
if(!bank[n].enable) continue;
unsigned offset = offsets[n] >> 14;
unsigned npages = vram[n].size >> 14;
int sel = bank[n].bus;
// Restrict MST to valid options (some are mirrored)
if(n < 2 || n < 4 && sel > 4 || sel > 5) sel &= 3;
if(sel == VRAMConfig::display)
mapVram(&vmap.arm9[4][offset], &vmap.dirty[offsets[n]/256], npages, vram[n]);
}
// Banks 0-3, all are 128K, mapped at increments of the same size.
for(unsigned n = 0; n < 4; n++) {
if(!bank[n].enable) continue;
unsigned offset = offsets[bank[n].offset] >> 14;
unsigned npages = vram[n].size >> 14;
uint8* dirty = &vmap.dirty[offsets[n]/256];
uint8* dirtyEnd = &vmap.dirty[offsets[n+1]/256];
VRAMMapping *bus = nullptr;
int sel = bank[n].bus;
if(n < 2 || sel > 4) sel &= 3;
if( sel == VRAMConfig::bg0) bus = &vmap.bg(0, offset);
if(n < 2 && sel == VRAMConfig::obj0) bus = &vmap.obj(0, offset);
if(n >= 2 && sel == VRAMConfig::arm7) bus = &vmap.arm7[offset];
if(n == 2 && sel == VRAMConfig::ppu1) bus = &vmap.bg(1, 0);
if(n == 3 && sel == VRAMConfig::ppu1) bus = &vmap.obj(1, 0);
if(sel == VRAMConfig::tex) {
bus = &vmap.tex[offset];
// Discard dirty textures, then mark the locked pages clean. Lines
// 191..213 are available for texture updates; games wishing to do so
// will perform at least one {unlock, DMA, lock} cycle in that time.
gpu.texCache.flushDirty(n);
memset(dirty, 0, dirtyEnd-dirty);
}
if(bus) mapVram(bus, dirty, npages, vram[n]);
}
// Banks 4-6 are 32K + 16K + 16K and carry over from the GBA.
// They always map into the first 128K of address space.
for(unsigned n = 4; n < 7; n++) {
if(!bank[n].enable) continue;
uint8* dirty = &vmap.dirty[offsets[n]/256];
uint8* dirtyEnd = &vmap.dirty[offsets[n+1]/256];
unsigned offset = n > 4? altOffsets[bank[n].offset] >> 14 : 0;
unsigned npages = vram[n].size >> 14;
VRAMMapping *bus = nullptr;
int sel = bank[n].bus;
if(sel > 5) sel &= 3;
if(sel == VRAMConfig::bg0) bus = &vmap.bg(0, offset);
if(sel == VRAMConfig::obj0) bus = &vmap.obj(0, offset);
if(sel == VRAMConfig::bgpal0) bus = &vmap.bgpal[0][offset];
if(sel == VRAMConfig::objpal0) bus = &vmap.objpal[0][offset];
if(sel == VRAMConfig::texpal) {
bus = &vmap.texpal[offset];
// The current implementation caches textures in ABGR format,
// so we need to check for dirty palettes, too.
gpu.texCache.flushDirty(n);
memset(dirty, 0, dirtyEnd-dirty);
}
if(bus) {
// Banks 5 and 6 (x) have offset 0 [x x ..............]
// mirroring that allows them to 1 [ x x ..128K banks..]
// map adjacent to larger banks. 2 [ x x ..............]
// Bank 4 (f) is fixed: 3 [ffff x x..............]
mapVram(bus + 0, dirty, npages, vram[n]);
if(n > 4) mapVram(bus + 2, dirty, npages, vram[n]);
}
}
// These two banks are only used with PPU1.
for(unsigned n = 8; n < 10; n++) {
if(!bank[n].enable) continue;
uint8* dirty = &vmap.dirty[offsets[n-1]/256];
int sel = bank[n].bus & 3;
if(sel == VRAMConfig::bg1) {
// These map in an odd sequence [hh hh ]
// where the Is mirror in pairs: [ ii ii]
mapVram(&vmap.bg(1, n==9? 2 : 0), dirty, 2, vram[n-1]);
mapVram(&vmap.bg(1, n==9? 6 : 4), dirty, 2, vram[n-1]);
}
if(n == 9 && sel == VRAMConfig::obj1) mapVram(&vmap.obj(1, 0), dirty, 8, vram[n-1]);
if(n == 8 && sel == VRAMConfig::bgpal1) mapVram(&vmap.bgpal[1][0], dirty, 8, vram[n-1]);
if(n == 9 && sel == VRAMConfig::objpal1) mapVram(&vmap.objpal[1][0], dirty, 8, vram[n-1]);
if(n == 8 && sel == VRAMConfig::display) mapVram(&vmap.arm9[4][38], dirty, 2, vram[n-1]);
if(n == 9 && sel == VRAMConfig::display) mapVram(&vmap.arm9[4][40], dirty, 1, vram[n-1]);
}
// Still unimplemented: bank[7] for SWRAM
// (currently both ARM7 and ARM9 have access)
// Apply mirroring to address spaces smaller than 512K
// Only needed in cases where it's possible to read out of bounds.
for(unsigned n = 0; n < 8; n++) {
// OBJ0 - 256K
vmap.obj(0, n+16) = vmap.obj(0, n+0);
vmap.obj(0, n+24) = vmap.obj(0, n+8);
// BG1, OBJ1 - 128K
vmap.bg(1, n+ 8) = vmap.bg(1, n);
vmap.bg(1, n+16) = vmap.bg(1, n);
vmap.bg(1, n+24) = vmap.bg(1, n);
vmap.obj(1, n+ 8) = vmap.obj(1, n);
vmap.obj(1, n+16) = vmap.obj(1, n);
vmap.obj(1, n+24) = vmap.obj(1, n);
}
for(unsigned i = 0; i < 4; i++)
for(unsigned n = 0; n < 32; n++)
vmap.arm9[i][n+32] = vmap.arm9[i][n+0];
}
uint32 System::regVmap(unsigned index) {
if(index == 0) return vmap.regs[3]<<24 | vmap.regs[2]<<16 | vmap.regs[1]<<8 | vmap.regs[0]<<0;
if(index == 1) return vmap.regs[7]<<24 | vmap.regs[6]<<16 | vmap.regs[5]<<8 | vmap.regs[4]<<0;
if(index == 2) return vmap.regs[9]<<8 | vmap.regs[8]<<0;
}
void System::regVmap(unsigned index, uint32 data, uint32 mask) {
if(mask & 0x000000ff) vmap.regs[4*index + 0] = data>>0;
if(mask & 0x0000ff00) vmap.regs[4*index + 1] = data>>8;
if(mask & 0x00ff0000 && index < 2) vmap.regs[4*index + 2] = data>>16;
if(mask & 0xff000000 && index < 2) vmap.regs[4*index + 3] = data>>24;
updateVmap();
}
void System::loadArm7Bios(const stream& stream) {
delete[] arm7.bios.data;
arm7.bios.size = stream.size();
arm7.bios.data = new uint32[(arm7.bios.size + 3)/4];
for(unsigned n = 0; n < arm7.bios.size; n += 4)
arm7.bios.write(n, Word, stream.readl(4));
}
void System::loadArm9Bios(const stream& stream) {
delete[] arm9.bios.data;
arm9.bios.size = stream.size();
arm9.bios.data = new uint32[(arm9.bios.size + 3)/4];
for(unsigned n = 0; n < arm9.bios.size; n += 4)
arm9.bios.write(n, Word, stream.readl(4));
}
void System::loadFirmware(const stream& stream) {
delete[] firmware.data;
firmware.size = stream.size();
firmware.data = new uint8[firmware.size];
for(unsigned n = 0; n < firmware.size; n++)
firmware.write(n, Byte, stream.read());
}
void System::saveFirmware(const stream& stream) {
for(unsigned n = 0; n < firmware.size; n++)
stream.write(firmware.read(n, Byte));
}
void System::loadRTC(const stream& stream) {
struct timeval cur = {};
gettimeofday(&cur, 0);
clock.reset();
clock.stoppedSec = cur.tv_sec;
clock.stoppedUSec = cur.tv_usec;
if(!stream.size()) return;
uint8* xml = new uint8[stream.size() + 1];
stream.read(xml, stream.size());
xml[stream.size()] = 0;
XML::Document document((const char*)xml);
delete[] xml;
if(document["rtc"].exists() == false)
return;
auto &rtc = document["rtc"];
//string model = rtc["model"].data;
if(rtc["settings"].exists()) {
clock.status1 = hex(rtc["settings"]["status"].data);
clock.status2 = hex(rtc["settings"]["mode"].data);
clock.adjust = hex(rtc["settings"]["adjust"].data);
clock.userByte = hex(rtc["settings"]["scratch"].data);
}
if(rtc["clock"].exists()) {
lstring date = rtc["clock"]["date"].data.split("/");
lstring time = rtc["clock"]["time"].data.split(":");
string wday = rtc["clock"]["day"].data;
if(date.size() >= 3 && time.size() >= 3) {
string sec = time[2];
string frac = "0";
if(sec.position(".")) {
frac = sec.split(".")[1];
sec = sec.split(".")[0];
}
clock.weekday = hex(wday) & 0x07;
clock.year = hex(date[0]) & 0xff;
clock.month = hex(date[1]) & 0x1f;
clock.day = hex(date[2]) & 0x3f;
clock.hour = hex(time[0]) & 0x3f;
clock.minute = hex(time[1]) & 0x7f;
clock.second = hex(sec) & 0x7f;
clock.counter = hex(frac);
}
}
for(unsigned i = 0; i < 2; i++) {
string alarm = {"alarm", i+1};
if(rtc[alarm].exists() == false) continue;
string wday = rtc[alarm]["day"].data;
lstring time = rtc[alarm]["time"].data.split(":");
lstring mode = rtc[alarm]["mode"].data.split(",");
clock.alarm[i].setting[0] = hex(wday) & 0x07;
if(time.size() >= 2) {
clock.alarm[i].setting[1] = hex(time[0]) & 0x7f;
clock.alarm[i].setting[2] = hex(time[1]) & 0x7f;
}
for(auto &select : mode) {
if(select.iequals("day")) clock.alarm[i].weekday |= 0x80;
if(select.iequals("hour")) clock.alarm[i].hour |= 0x80;
if(select.iequals("minute")) clock.alarm[i].minute |= 0x80;
}
}
if(rtc["saved"].exists()) {
lstring savedOn = rtc["saved"]["on"].data.split("/");
lstring savedAt = rtc["saved"]["at"].data.split(":");
string savedDst = rtc["saved"]["dst"].data;
if(savedOn.size() >= 3 && savedAt.size() >= 3) {
struct tm last = {};
string sec = savedAt[2];
string usec = "0";
if(sec.position(".")) {
usec = sec.split(".")[1];
sec = sec.split(".")[0];
}
last.tm_isdst = savedDst != ""? integer(savedDst) : -1;
last.tm_yday = -1;
last.tm_year = decimal(savedOn[0]) - 1900;
last.tm_mon = decimal(savedOn[1]) - 1;
last.tm_mday = decimal(savedOn[2]);
last.tm_wday = -1;
last.tm_hour = decimal(savedAt[0]);
last.tm_min = decimal(savedAt[1]);
last.tm_sec = decimal(sec);
struct timeval now = {};
gettimeofday(&now, nullptr);
clock.freeze(timegm(&last), decimal(usec));
clock.thaw(now.tv_sec, now.tv_usec);
}
}
}
void System::saveRTC(const stream& stream) {
struct timeval tv = {};
gettimeofday(&tv, nullptr);
clock.thaw(tv.tv_sec, tv.tv_usec);
clock.freeze(tv.tv_sec, tv.tv_usec);
struct tm now = *gmtime(&tv.tv_sec);
string saveDate = { decimal(1900 + now.tm_year), "/", decimal<2,'0'>(now.tm_mon+1), "/", decimal<2,'0'>(now.tm_mday) };
string saveTime = { decimal<2,'0'>(now.tm_hour), ":", decimal<2,'0'>(now.tm_min), ":", decimal<2,'0'>(now.tm_sec), ".", decimal<6,'0'>(tv.tv_usec) };
string dateStr = { hex<4>(0x2000 + clock.year), "/", hex<2>(clock.month), "/", hex<2>(clock.day) };
string timeStr = { hex<2>(clock.hour), ":", hex<2>(clock.minute), ":", hex<2>(clock.second), ".", hex<4>(clock.counter) };
string alarmTime[2], alarmDay[2], alarmMode[2];
for(unsigned i = 0; i < 2; i++) {
lstring conditions;
if(clock.alarm[i].weekday & 0x80) conditions[i].append("day");
if(clock.alarm[i].hour & 0x80) conditions[i].append("hour");
if(clock.alarm[i].minute & 0x80) conditions[i].append("minute");
alarmMode[i] = conditions.concatenate(",");
alarmDay[i] = hex<1>(clock.alarm[i].weekday & 7);
alarmTime[i] = { hex<2>(clock.alarm[i].hour & 0x3f),
":", hex<2>(clock.alarm[i].minute & 0x7f) };
};
string xml{
"<?xml version='1.0' encoding='UTF-8'?>\n",
"<rtc>\n",
" <saved on='", saveDate, "'",
" at='", saveTime, "'",
" dst='", now.tm_isdst, "'/>\n",
"\n",
" <settings status='", hex<2>(clock.status1), "'",
" mode='", hex<2>(clock.status2), "'",
" scratch='", hex<2>(clock.userByte), "'",
" adjust='", hex<2>(clock.adjust), "'/>\n",
"\n",
" <clock date='", dateStr, "'",
" day='", hex<1>(clock.weekday), "'",
" time='", timeStr, "'/>\n",
"\n",
" <alarm1 time='", alarmTime[0], "' day='", alarmDay[0], "' mode='", alarmMode[0], "'/>\n",
" <alarm2 time='", alarmTime[1], "' day='", alarmDay[1], "' mode='", alarmMode[1], "'/>\n",
"</rtc>"
};
stream.write((uint8*)(const char*)xml, xml.length());
}
void System::power() {
// Clear memory
memset(ewram.data, 0, ewram.size);
memset(iwram.data, 0, iwram.size);
memset(swram[0].data, 0, swram[0].size);
memset(swram[1].data, 0, swram[1].size);
memset(unmappedVram, 0, sizeof unmappedVram);
for(unsigned n = 0; n < 9; n++)
memset(vram[n].data, 0, vram[n].size);
// Reset VRAM mappings
for(auto &r : vmap.regs) r = 0x00;
clearVmap();
memset(vmap.dirty, 0xff, sizeof vmap.dirty);
arm9.config.arm9 = true;
arm9.config.arm7 = false;
arm7.config.arm9 = false;
arm7.config.arm7 = true;
arm9.other = &arm7;
arm7.other = &arm9;
ppu[0].which = 0;
ppu[1].which = 1;
arm7.power();
arm9.power();
clock.power();
powerMgr.power();
firmware.power();
touchscreen.power();
slot1.power();
video.power();
gpu.power();
ppu[0].power();
ppu[1].power();
apu.power();
wifi.power();
callerThread = co_active();
activeThread = arm7.thread; // Required since right now ARM7 loads the binaries..
}
void System::run() {
callerThread = co_active();
co_switch(activeThread);
}
void System::frame() {
interface->videoRefresh(video.output, 256*4, 256, 384);
activeThread = co_active();
co_switch(callerThread);
}
#include "clock.cpp"
#include "powermgr.cpp"
#include "touchscreen.cpp"
}

View File

@ -1,231 +0,0 @@
#include "eventqueue.hpp"
typedef function<void()> Action;
typedef EventQueue_of<Action> EventQueue;
typedef EventQueue_of<Action>::Elem Event;
enum class Input : unsigned {
// REG_KEYINPUT (10 bits)
A, B, Select, Start, Right, Left, Up, Down, R, L,
// REG_AUXINPUT (8 bits)
// - C, Z, W seem present, but unconnected
// - D is supposedly on debug units
X, Y, C, D, Z, W, Pen, Lid,
// Touchscreen
PenX, PenY, PenZ1, PenZ2,
};
struct SPIDevice {
virtual ~SPIDevice();
virtual void power() = 0;
virtual void select(bool state) = 0;
virtual uint8 transfer(uint8 data) = 0;
};
#include "../memory/eeprom.hpp"
#include "../memory/fram.hpp"
#include "../memory/flash.hpp"
struct PowerMgr : SPIDevice {
~PowerMgr();
void power();
void select(bool state);
uint8 transfer(uint8 data);
};
struct Touchscreen : SPIDevice {
~Touchscreen();
void power();
void select(bool state);
uint8 transfer(uint8 data);
bool penDown();
uint1 bitDepth; enum { read12, read8 };
uint1 refMode; enum { diff, single };
uint2 powerMode; enum { powerAuto, powerADC, powerRef, powerFull };
uint3 input; enum {
temp0 = 0, temp1 = 7,
vbatt = 2, aux = 6,
ypos = 1, xpos = 5,
pressure0 = 3, pressure1 = 4,
};
uint32 adc;
uint12 last;
};
struct Clock {
Clock();
~Clock();
void reset();
void power();
uint4 io(uint4 pins);
void dateTime();
void alarmTime(unsigned no);
void field(uint8& src, uint8 mask);
void tick();
void tickSecond();
void tickDay();
bool incBCD(uint8& val, uint8 first, uint8 last);
void freeze(int64 curSec, int64 curUSec);
void thaw(int64 curSec, int64 curUSec);
int64 stoppedSec, stoppedUSec;
uint1 lastCs, lastClk;
uint8 buffer, dataOut;
uint3 nbits;
uint8 command, index;
bool output;
uint8 status1; enum {
resetBit = 0x01, hourMode = 0x02,
int1Flag = 0x10, int2Flag = 0x20,
powerLow = 0x40, powerLost = 0x80
};
uint8 status2; enum {
int1Mode = 0x0f,
int2Mode = 0x40, testMode = 0x80
};
uint8 adjust; // clock rate fine adjustment
uint8 userByte;
Event event;
uint16 counter; // 32KHz subsecond counter
uint1 intr[2]; // Interrupts (both go to IRQ pin)
uint1 hold; // Delays seconds tick while reading the time
uint16 lag; // How long the seconds tick was delayed
uint1 secondsTickPending;
union {
struct { uint8 year, month, day, weekday, hour, minute, second; };
struct { uint8 time[7]; };
};
union {
struct { uint8 weekday, hour, minute; };
struct { uint8 setting[3]; };
} alarm[2];
};
struct VRAMMapping : HalfMemory {
VRAMMapping();
VRAMMapping(HalfMemory& target, uint8* dirtyBits, unsigned pageno);
~VRAMMapping();
uint16& operator[](uint32 addr);
uint32 read(uint32 addr, uint32 size);
void write(uint32 addr, uint32 size, uint32 word);
bool dirty(uint32 addr);
void clearDirty(uint32 addr, uint32 size);
uint8 *dirtyBits;
};
struct System {
cothread_t callerThread;
cothread_t activeThread;
bool running;
Flash firmware;
Touchscreen touchscreen;
PowerMgr powerMgr;
Clock clock;
SDRAM ewram; // 4MB shared external WRAM
SRAM iwram; // 64KB 32-bit work RAM, private to ARM7
SRAM swram[2]; // 16KB x 2 banks switchable 32-bit WRAM
HalfMemory vram[9]; // 656KB in 9 banks of 16-bit VRAM
HalfMemory wxram; // 8KB wireless packet buffer
uint16 unmappedVram[0x2000];
struct VRAMConfig {
uint1 enable;
uint2 offset;
uint3 bus; // aka. MST or "Magic Selector Thing"
enum {
display=0, // all banks
bg0=1, // A,B,C,D,E,F,G
obj0=2, // A,B, E,F,G
arm7=2, // C,D
tex=3, // A,B,C,D
texpal=3, // E,F,G
bgpal0=4, // E,F,G
objpal0=5, // E?,F,G
ppu1=4, // C,D
bg1=1, // H,I
obj1=2, // I
bgpal1=2, // H
objpal1=3, // I
};
operator uint8() {
return enable<<7 | offset<<3 | bus<<0;
}
VRAMConfig& operator=(uint8 data) {
enable = data>>7;
offset = data>>3;
bus = data>>0;
return *this;
}
};
struct {
// Mappings for each graphics bus that can have VRAM banked onto it.
// Each page is 16K, the finest granularity.
VRAMMapping arm9[8][64]; // 8x1M arm9 map - 0x060,062,064,066,068,..
VRAMMapping arm7[16]; // 256K arm7 map - 0x060..
// Locked memory can be used by the GPU, but isn't mapped in CPU space.
VRAMMapping tex[32]; // 512K locked texture memory
VRAMMapping texpal[8]; // 128K locked texture palettes
VRAMMapping bgpal[2][8]; // 32K locked BG palettes
VRAMMapping objpal[2][8]; // 8K locked OBJ palettes
VRAMMapping &bg(int w, int i) { return arm9[0+w][i]; }
VRAMMapping &obj(int w, int i) { return arm9[2+w][i]; }
VRAMMapping &display(int i) { return arm9[4][i&31]; }
VRAMConfig regs[10]; // A,B,C,D, E,F,G,SWRAM, H,I
uint8 dirty[(512 + 96 + 48)*1024 / 256];
} vmap;
void clearVmap();
void updateVmap();
void mapVram(VRAMMapping* loc, uint8 *dirtyBits, unsigned npages, HalfMemory& bank);
uint32 regVmap(unsigned index);
void regVmap(unsigned index, uint32 data, uint32 mask);
void loadArm7Bios(const stream&);
void loadArm9Bios(const stream&);
void loadFirmware(const stream&);
void loadRTC(const stream&);
void saveFirmware(const stream&);
void saveRTC(const stream&);
void power();
void run();
void frame();
System();
};
extern System system;

View File

@ -1,112 +0,0 @@
Touchscreen::~Touchscreen() { }
void Touchscreen::power() {
input = 0;
bitDepth = 0;
refMode = 0;
powerMode = 0;
adc = 0;
last = 0;
}
void Touchscreen::select(bool state) {
if(state) return;
adc = 0;
}
bool Touchscreen::penDown() {
if(powerMode == 3) return true; // differs between NDS Lite and original?
signed x = interface->inputPoll(ID::Port::Touchpad, 0, ID::Touchpad::X);
signed y = interface->inputPoll(ID::Port::Touchpad, 0, ID::Touchpad::Y);
signed p = interface->inputPoll(ID::Port::Touchpad, 0, ID::Touchpad::Pressure);
signed d = 0;//interface->inputPoll(ID::Port::Touchpad, 0, ID::Touchpad::PressureD);
if(y < 0) y = -0x8000;
if(x == -0x8000 || y == -0x8000) {
return 0; // no touch input
}
if(p == -0x8000) // no analog pressure
p = d? 0 : -0x7fff; // use digital
return p > -0x7ff0;
}
uint8 Touchscreen::transfer(uint8 data) {
if(data & 0x80) {
// Start a new measurement
input = data>>4;
bitDepth = data>>3;
refMode = data>>2;
powerMode = data>>0;
signed NONE = -0x8000;
signed x = interface->inputPoll(ID::Port::Touchpad, 0, ID::Touchpad::X);
signed y = interface->inputPoll(ID::Port::Touchpad, 0, ID::Touchpad::Y);
signed p = interface->inputPoll(ID::Port::Touchpad, 0, ID::Touchpad::Pressure);
signed d = 0;//interface->inputPoll(ID::Port::Touchpad, 0, ID::Touchpad::PressureD);
if(p==NONE) p = !d? -0x7fff : -0x3fff; // no analog? use digital pen
if(x==NONE || y==NONE) p = NONE; // check if pen offscreen
if(y < 0) p = NONE; // restrict to bottom screen
x += 0x7fff; y *= 2;
p += 0x7fff;
if(p > 0) {
signed z1 = 0x000, z2 = 0xfff, r = 0xffff - p;
if(x > 0) { // bleh, div-by-zero, just use medium pressure for now
z1 = 0xc00; //0xffffff / (0x1000 + 0x1000/x + 0x1000*(r - y)/x);
z2 = 0x400; //z1*(0x1000 + 0x1000*r/x) / 0x1000;
}
x /= 16;
y /= 16;
// Without any pressure, there's no connection, and the input floats.
// The measurements seem to return previous values to some extent.
if(input == xpos) last = x; // y+ (along x-/x+ axis)
if(input == ypos) last = y; // x+ (along y-/y+ axis)
if(input == pressure0) last = z1; // x+ (along x-/y+ diagonal?)
if(input == pressure1) last = z2; // y- (along x-/y+ diagonal?)
}
signed vref = 0x34cc; // reference = 3.3V
signed room = (273 + 25)*0x1000; // 25*C in kelvin
signed k = room;
signed t1 = 0x1000*600 - (k-room)*21/10; // t1 = 600mV @ room - 2.1mV/*K + error
signed t2 = t1 + k * 0x1000/0x292b; // t2 = t1 + k/2.573
if(input == temp0) last = t1 * 0xfff/vref; // temperature
if(input == temp1) last = t2 * 0xfff/vref; // temp (w/bias)
if(input == vbatt) last = 0x000; // grounded
if(input == aux) {
// This was split into two inputs - reference is always "single" mode
if(refMode==single) last = 0x800; // microphone
if(refMode==diff) last = 0xfff; // Vcc (NDS Lite)
}
adc |= last;
// 8-bit isn't any faster since you have to read two bytes over the serial
// interface either way. Bumping the clock to 4MHz may not provide enough
// time for the ADC to settle, producing less accurate results.
if(bitDepth == read8)
adc &= ~0xf;
}
// Response is 1 bit delay + 8 or 12 bits data.
// This causes some misalignment:
// -------- 0hhhhhhhlllll - after command
// 0hhhhhhh lllll000----- - after first read
// lllll000 [0hhhhhhhlllll] - after second read [+ command]
// [0hhhhhhh lllll000-----] - ...
// [lllll000 0hhhhhhhlllll] -
// response next data
//
// It's perfectly legit (and common) to send a new command while reading the
// low byte. By doing this, you receive a stream of readings with no gaps or
// command overhead in between.
return adc <<= 8, adc >> 21;
}

View File

@ -1,180 +0,0 @@
#include <nds/nds.hpp>
namespace NintendoDS {
Video video;
void Video::scanline(unsigned y) {
gpu.scanline();
ppu[0].scanline();
ppu[1].scanline();
if(line < 192) {
bool display = source[0] == srcFrameBuffer;
bool read = write && sourceB == srcFrameBuffer;
unsigned srcline = y + 64*(frameBuffer & (display? 0xc : 0xf));
unsigned destline = y + 64*targetBuffer;
uint16 *src = &system.vmap.display(srcline / 32)[512*(srcline % 32)];
uint16 *dest = &system.vmap.display(destline / 32)[512*(destline % 32)];
if(read || display) {
for(unsigned x = 0; x < 256; x++) {
uint16 c = src[x];
uint5 r = c>>0, g = c>>5, b = c>>10;
frameBufData[x] = b<<13 | g<<7 | r<<1;
}
}
if(write) {
uint32 *aData = sourceA == srcGPU? &gpu.output[256*line] : ppu[0].output;
uint32 *bData = sourceB == srcFifo? fifoData : frameBufData;
unsigned af = min((unsigned)blendAf, 16u);
unsigned bf = min((unsigned)blendBf, 16u);
if(targetSource == 0) af = 16, bf = 0;
if(targetSource == 1) af = 0, bf = 16;
// Need to see what happens with alpha bit..
for(unsigned x = 0; x < 256; x++) {
uint32 a = aData[x], b = bData[x];
uint6 ar = a>>0, ag = a>>6, ab = a>>12;
uint6 br = b>>0, bg = b>>6, bb = b>>12;
ar = (ar*af + br*bf + 8) / 32u;
ag = (ag*af + bg*bf + 8) / 32u;
ab = (ab*af + bb*bf + 8) / 32u;
dest[x] = 0x8000 | ab<<10 | ag<<5 | ar<<0;
}
}
uint32* line[2] = { &output[256*(y + 192*(ppu0Screen == 0))],
&output[256*(y + 192*(ppu0Screen == 1))] };
if(source[0] == srcNone) memcpy(line[0], blankData, 256*4);
if(source[0] == srcPPU) memcpy(line[0], ppu[0].output, 256*4);
if(source[0] == srcFrameBuffer) memcpy(line[0], frameBufData, 256*4);
if(source[0] == srcFifo) memcpy(line[0], fifoData, 256*4);
if(source[1] == srcNone) memcpy(line[1], blankData, 256*4);
if(source[1] == srcPPU) memcpy(line[1], ppu[1].output, 256*4);
if(source[1] == srcFrameBuffer) memcpy(line[1], blankData, 256*4);
if(source[1] == srcFifo) memcpy(line[1], blankData, 256*4);
for(unsigned s = 0; s < 2; s++) {
if(!fade[s] || !blendYf[s]) continue;
uint64 yf = min(32, 2*blendYf[s]);
uint64 y = 02000200020;
if(fade[s] == lighten)
y += 07700770077*yf;
for(unsigned x = 0; x < 256; x++) {
uint64 bgr = (line[s][x] & 0777777)*01000001ull & 07700770077ull;
bgr = (bgr*(32-yf) + y)/32;
if(bgr & 010000000000) bgr |= 007700000000;
if(bgr & 000001000000) bgr |= 000000770000;
if(bgr & 000000000100) bgr |= 000000000077;
bgr &= 007700770077;
line[s][x] = (bgr | bgr>>18) & 0777777;
}
}
}
}
uint32 Video::regCapture() {
return blendAf<<0 | blendBf<<8
| (targetBuffer<<14 & 3<<16)
| (targetBuffer<<18 & 3<<18)
| (frameBuffer<<26 & 3<<26)
| (sourceA == srcGPU? 1<<24 : 0)
| (sourceB == srcFifo? 1<<25 : 0)
| targetSource<<29 | write<<31;
}
void Video::regCapture(uint32 data, uint32 mask) {
if(mask & 0x000000ff) {
blendAf = data>>0;
}
if(mask & 0x0000ff00) {
blendBf = data>>8;
}
if(mask & 0x00ff0000) {
targetBuffer = data>>14 & 0xc;
targetBuffer += data>>18 & 0x3;
targetSize = data>>20;
}
if(mask & 0xff000000) {
frameBuffer &= ~3;
frameBuffer += data>>26 & 0x3;
sourceA = data & 1<<24? srcGPU : srcPPU;
sourceB = data & 1<<25? srcFifo : srcFrameBuffer;
targetSource = data>>29;
write = data>>31;
}
}
void Video::regFifo(uint32 data) {
}
uint32 Video::regBrightness(unsigned index) {
return fade[index]<<14 | blendYf[index]<<0;
}
void Video::regBrightness(unsigned index, uint32 data, uint32 mask) {
if(mask & 0x001f) blendYf[index] = data>>0;
if(mask & 0xc000) fade[index] = data>>14;
}
void Video::power() {
for(unsigned x = 0; x < 256; x++)
blankData[x] = 0x3ffff;
line = 262;
ppu0Screen = 0;
source[0] = srcNone;
source[1] = srcNone;
frameBuffer = 0;
fade[0] = 0;
fade[1] = 0;
blendYf[0] = 0;
blendYf[1] = 0;
write = false;
sourceA = srcNone;
sourceB = srcNone;
blendAf = blendBf = 0;
targetBuffer = 0;
hdrawEvent.action = [&]() {
if(++line == 263) {
line = 0;
}
if(line == 192) {
write = false;
system.frame();
}
scanline(line);
arm9.event.queue.add(12*256, hblankEvent);
arm7.hdraw();
arm9.hdraw();
};
hblankEvent.action = [&]() {
arm9.event.queue.add(12*99, hdrawEvent);
arm7.hblank();
arm9.hblank();
};
arm9.event.queue.add(0, hdrawEvent);
}
}

View File

@ -1,44 +0,0 @@
struct Video {
void power();
void scanline(unsigned y);
uint32 regCapture();
void regCapture(uint32 data, uint32 mask);
void regFifo(uint32 data);
uint32 regBrightness(unsigned index);
void regBrightness(unsigned index, uint32 data, uint32 mask);
uint32 output[256*384];
uint32 blankData[256];
uint32 frameBufData[256];
uint32 fifoData[256];
uint9 line;
Event hdrawEvent;
Event hblankEvent;
// Display and rendering on NDS are more loosely coupled, although
// the GPU and PPUs are still all locked to 60fps.
enum { srcNone, srcPPU, srcFrameBuffer, srcFifo, srcGPU };
// For display onscreen
uint1 screensPowered;
uint1 ppu0Screen; // 0 is bottom, 1 is top
int source[2];
int fade[2]; enum { lighten=1, darken=2 };
uint5 blendYf[2];
// For rendering to VRAM
bool write;
int sourceA; // PPU0 or GPU
int sourceB; // framebuffer or FIFO
uint5 blendAf, blendBf; // for blending A + B
uint2 targetSize;
uint2 targetSource;
uint4 targetBuffer;
uint4 frameBuffer;
};
extern Video video;

View File

@ -1,488 +0,0 @@
#include <nds/nds.hpp>
namespace NintendoDS {
WIFI wifi;
void WIFI::power() {
// This isn't really cleared but we have to be deterministic.
memset(system.wxram.data, 0, system.wxram.size);
powered = true;
reg004 = 0;
reg034 = 0;
swMode = 0;
wepMode = 0;
memset(macAddr, 0, sizeof macAddr);
memset(bssId, 0, sizeof bssId);
assocIdl = 0;
assocIdf = 0;
rxControl = 0;
wepControl = 0;
reg034 = 0;
baseBandPower = 0;
interrupt.flags = 0;
interrupt.enable = 0;
interrupt.counterFlags = 0;
interrupt.counterEnable = 0;
interrupt.oflowFlags = 0;
interrupt.oflowEnable = 0;
pm.txIdle = 0;
pm.wakeRequest = 0;
pm.wakePending = 0;
pm.sleeping = 0;
bb.event.action = [&]() { bbTransferBit(); };
bb.time = 0;
bb.busy = 0;
bb.read = 0;
bb.write = 0;
bb.header = 0;
bb.powerl = 0;
bb.powerh = 0;
bb.mode8 = 0;
bb.modeE = 0;
bb.clock = 0;
memset(bb.regs, 0, sizeof bb.regs);
bb.regs[0x00] = 0x6d;
bb.regs[0x4d] = 0xff;
bb.regs[0x5d] = 0x01;
bb.regs[0x64] = 0xff;
rf.event.action = [&]() { rfTransferBit(); };
rf.time = 0;
rf.busy = 0;
rf.data = 0;
rf.length = 24;
rf.type = 0;
rf.reserved = 0;
memset(rf.regs, 0, sizeof rf.regs);
rf.regs[0x00] = 0x00007;
rf.regs[0x01] = 0x09003;
rf.regs[0x02] = 0x00022;
rf.regs[0x03] = 0x1ff78;
rf.regs[0x04] = 0x09003;
rf.regs[0x05] = 0x01780;
rf.regs[0x06] = 0x00000;
rf.regs[0x07] = 0x14578;
rf.regs[0x08] = 0x1e742;
rf.regs[0x09] = 0x00120;
rf.regs[0x0a] = 0x00000;
rf.regs[0x0b] = 0x00000;
rf.regs[0x0c] = 0x00000;
rf.regs[0x1b] = 0x0000f;
rf.regs[0x0e] = 0x00022;
rf.regs[0x1f] = 0x00001;
txWritePos = 0; rxBufBegin = 0;
txWriteCounter = 0; rxBufEnd = 0;
txGapBegin = 0; rxWritePos = 0;
txGapSize = 0; rxWritePosLatch = 0;
txTimOffset = 0; rxReadPos = 0;
txBeacon = 0; rxSoftReadPos = 0;
txMultiCmd = 0; rxReadCounter = 0;
txSlots[0] = 0; rxGapBegin = 0;
txSlots[1] = 0; rxGapSize = 0;
txSlots[2] = 0; reg00a = 0;
txStatControl = 0;
txRetryLimit = 0;
reg1a0l = 0;
reg1a0m = 0;
reg1a0n = 0;
reg1a0h = 0;
reg1a2 = 0;
reg1a4 = 0;
listenCount = 0;
listenInterval = 0;
beaconInterval = 0;
preamble = 0;
random = 1;
timer.enable = false;
timer.enableIrq = false;
timer.enableTxMP = false;
timer.count = 0;
timer.compare = 0;
config120l = 0; config140 = 0; config154l = 0;
config120h = 0; config142 = 0; config154m = 0;
config122 = 0; config144 = 0; config154h = 0;
config124 = 0; config146 = 0;
config128 = 0; config148 = 0; config0d4 = 0;
config130 = 0; config14a = 0; config0d8 = 0;
config132l = 0; config14c = 0; config0da = 0;
config132h = 0; config150l = 0; config0ecl = 0;
configBeaconCount = 0; config150h = 0; config0ech = 0;
stats.reg1b0 = 0;
stats.reg1b2 = 0;
stats.reg1b4 = 0;
stats.reg1b6 = 0;
stats.reg1b8 = 0;
stats.reg1ba = 0;
stats.reg1bc = 0;
stats.reg1be = 0;
stats.txErrors = 0;
stats.rxPackets = 0;
memset(stats.mpErrors, 0, sizeof stats.mpErrors);
}
void WIFI::bbTransfer() {
// Assuming 22MHz (66MHz/3) clock here. Probably wrong.
bb.busy = 0;
bb.read = 0;
bb.time = 16 + 8;
arm7.event.queue.add(3, bb.event);
}
void WIFI::bbTransferBit() {
if(--bb.time) {
// Small delay before setting busy bit
if(bb.time == 20) bb.busy = 1;
arm7.event.queue.add(3, bb.event);
return;
}
// Finished, commit the read or write.
bb.busy = 0;
uint1 start = bb.header>>14;
uint1 rd = bb.header>>13;
uint1 wr = bb.header>>12;
uint8 addr = bb.header>>0;
if(start) {
if(rd) {
bb.read = addr < 0x69? bb.regs[addr] : 0;
//print("bb read ",hex<2>(addr)," = ",hex<2>(bb.read),"\n");
}
if(wr) {
if(0x0d <= addr && addr <= 0x12) return;
if(0x16 <= addr && addr <= 0x1a) return;
if(0x5d <= addr && addr <= 0x61) return;
if(0x00 == addr || 0x27 == addr) return;
if(0x4d == addr || 0x64 == addr) return;
if(0x66 == addr || 0x69 <= addr) return;
if(addr < 0x69) bb.regs[addr] = bb.write;
//print("bb write ",hex<2>(addr)," : ",hex<2>(bb.write),"\n");
}
}
}
void WIFI::rfTransfer() {
rf.busy = 1;
rf.time = 18 + 6;
arm7.event.queue.add(3, rf.event);
}
void WIFI::rfTransferBit() {
if(--rf.time) {
arm7.event.queue.add(3, rf.event);
return;
}
//Finished, commit the read or write.
rf.busy = 0;
uint1 rd = rf.data>>23;
uint5 addr = rf.data>>18;
uint18 data = rf.data>>0;
if(rd) {
rf.data = rf.regs[addr];
//print("rf read ",hex<2>(addr)," = ",hex<5>(rf.data),"\n");
}
else {
rf.regs[addr] = data;
//print("rf write ",hex<2>(addr)," : ",hex<5>(data),"\n");
}
}
uint32 WIFI::read(uint32 addr, uint32 size) {
if(addr & 0x4000)
return system.wxram.read(addr & 0x1fff, size);
if(size==Word) { return read(addr&~2, Half)<<0
| read(addr| 2, Half)<<16; }
//if(addr != 0x480015e && addr != 0x4800180)
// print("wifi r ",hex<8>(addr),"\n");
switch(addr & 0xffe) {
case 0x000: return 0xc340; // chip ID
case 0x004: return reg004;
case 0x006: return swMode<<0 | wifi.wepMode<<3;
case 0x008: return txStatControl;
case 0x00a: return reg00a;
case 0x010: return interrupt.flags;
case 0x012: return interrupt.enable;
case 0x018: return macAddr[0] | macAddr[1]<<8;
case 0x01a: return macAddr[2] | macAddr[3]<<8;
case 0x01c: return macAddr[4] | macAddr[5]<<8;
case 0x020: return bssId[0] | bssId[1]<<8;
case 0x022: return bssId[2] | bssId[3]<<8;
case 0x024: return bssId[4] | bssId[5]<<8;
case 0x028: return assocIdl;
case 0x02a: return assocIdf;
case 0x02c: return txRetryLimit;
case 0x030: return rxControl;
case 0x032: return wepControl;
case 0x034: return reg034;
case 0x036: return bb.clock;
case 0x038: return pm.txIdle;
case 0x03c: return pm.wakeRequest | pm.wakePending<<8 | pm.sleeping<<9;
case 0x040: return 0;//wifi.pm.;
case 0x044: {
uint11 data = random;
random = random<<1 | random>>10;
random ^= data & 1;
return data;
}
case 0x050: return rxBufBegin;
case 0x052: return rxBufEnd;
case 0x054: return rxWritePos;
case 0x056: return rxWritePosLatch;
case 0x058: return rxReadPos<<1;
case 0x05a: return rxSoftReadPos;
case 0x05c: return rxReadCounter;
case 0x060: break;//return rxBufRead();
case 0x062: return rxGapBegin<<1;
case 0x064: return rxGapSize;
case 0x068: return txWritePos<<1;
case 0x06c: return txWriteCounter;
case 0x074: return txGapBegin<<1;
case 0x076: return txGapSize;
case 0x080: return txBeacon;
case 0x084: return txTimOffset;
case 0x088: return listenCount;
case 0x08c: return beaconInterval;
case 0x08e: return listenInterval;
case 0x0bc: return preamble;
case 0x0d4: return config0d4;
case 0x0d8: return config0d8;
case 0x0da: return config0da;
case 0x0e8: return timer.enable;
case 0x0ea: return timer.enableIrq;
case 0x0ec: return config0ecl | config0ech<<8;
case 0x0ee: return timer.enableTxMP;
case 0x110: return preBeacon;
case 0x120: return config120l | config120h<<15;
case 0x122: return config122;
case 0x124: return config124;
case 0x128: return config128;
case 0x130: return config130;
case 0x132: return config132l | config132h<<15;
case 0x134: return configBeaconCount;
case 0x140: return config140;
case 0x142: return config142;
case 0x144: return config144;
case 0x146: return config146;
case 0x148: return config148;
case 0x14a: return config14a;
case 0x14c: return config14c;
case 0x150: return config150l | config150h<<8;
case 0x154: return config154l | config154m<<9 | config154h<<11;
case 0x15c: return bb.read;
case 0x15e: return bb.busy;
case 0x160: return bb.mode8<<8 | bb.modeE<<14;
case 0x168: return bb.powerl<<0 | bb.powerh<<15;
case 0x17c: return rf.data>>16;
case 0x17e: return rf.data>>0;
case 0x180: return rf.busy;
case 0x1a0: return reg1a0l | reg1a0m<<4 | reg1a0n<<8 | reg1a0h<<11;
case 0x1a2: return reg1a2;
case 0x1a4: return reg1a4;
case 0x244: return 0x0000;
case 0x254: return 0xeeee;
case 0x290: return 0xffff;
}
//print("r ",hex<8>(addr),": unimplemented\n");
return 0;
}
void WIFI::write(uint32 addr, uint32 size, uint32 data) {
if(size==Byte) return;
if(size==Word) { write(addr&~2, Half, data & 0xffff);
write(addr| 2, Half, data >> 16); return; }
if(addr & 0x4000)
return system.wxram.write(addr & 0x1fff, Half, size);
//print("wifi w ",hex<8>(addr)," : ",hex<4>(data),"\n");
switch(addr & 0xffe) {
case 0x004:
if((reg004 ^ data) & 1<<0) {
if(data & 1) {
reg034 = 0x2;
//rf.pins = 0x46;
//rf.status = 9;
//reg27c = 5;
//reg2a2 = ..;
}
else {
reg034 = 0xa;
}
}
if(data & 1<<13) {
rxWritePosLatch = 0;
//reg0c0 = 0;
//reg0c4 = 0;
//reg1a4 = 0;
//reg278 = 0xf;
}
if(data & 1<<14) {
swMode = 0;
wepMode = 0;
//txStatCnt = 0;
//reg00a = 0;
memset(macAddr, 0, sizeof macAddr);
memset(bssId, 0, sizeof bssId);
assocIdl = 0;
assocIdf = 0;
//txRetryLimit = 0;
//reg02e = 0;
rxBufBegin = 0x4000;
rxBufEnd = 0x4800;
txTimOffset = 0;
//reg0bc = 1;
//reg0d0 = 0x401;
config0d4 = 1;
//reg0e0 = 8;
config0ecl = 3;
config0ech = 0x3f;
//reg194 = 0;
//reg198 = 0;
//reg1a2 = 1;
//reg224 = 3;
//reg230 = 0x47;
}
reg004 = data;
return;
case 0x006: swMode = data>>0; wepMode = data>>3; return;
case 0x008: txStatControl = data; return;
case 0x00a: reg00a = data; return;
case 0x21c: interrupt.flags |= data & ~0x400; return;
case 0x010: interrupt.flags &= ~data; return;
case 0x012: interrupt.enable = data; return;
case 0x018: macAddr[0] = data; macAddr[1] = data>>8; return;
case 0x01a: macAddr[2] = data; macAddr[3] = data>>8; return;
case 0x01c: macAddr[4] = data; macAddr[5] = data>>8; return;
case 0x020: bssId[0] = data; bssId[1] = data>>8; return;
case 0x022: bssId[2] = data; bssId[3] = data>>8; return;
case 0x024: bssId[4] = data; bssId[5] = data>>8; return;
case 0x028: assocIdl = data; return;
case 0x02a: assocIdf = data; return;
case 0x02c: txRetryLimit = data; return;
case 0x030: rxControl = data; return;
case 0x032: wepControl = data & 0x8000; return;
case 0x034: reg034 = data; return;
case 0x036: bb.clock = data; return;
case 0x038: pm.txIdle = data; return;
case 0x03c: pm.wakeRequest = data>>1; return;
case 0x040: if(data & 1<<15) {
reg034 = 2;
pm.sleeping = data>>0;
}
return;
case 0x050: rxBufBegin = data; return;
case 0x052: rxBufEnd = data; return;
case 0x056: rxWritePosLatch = data; return;
case 0x058: rxReadPos = data>>1; return;
case 0x05a: rxSoftReadPos = data; return;
case 0x05c: rxReadCounter = data; return;
case 0x062: rxGapBegin = data>>1; return;
case 0x064: rxGapSize = data; return;
case 0x068: txWritePos = data>>1; return;
case 0x06c: txWriteCounter = data; return;
case 0x070: break;//return txBufWrite(data);
case 0x074: txGapBegin = data>>1; return;
case 0x076: txGapSize = data; return;
case 0x080: txBeacon = data; return;
case 0x084: txTimOffset = data; return;
case 0x088: listenCount = data; return;
case 0x08c: beaconInterval = data; return;
case 0x08e: listenInterval = data; return;
case 0x0b4: return; //txBufReset
case 0x0bc: preamble = data; return;
case 0x0d4: config0d4 = data; return;
case 0x0d8: config0d8 = data; return;
case 0x0da: config0da = data; return;
case 0x0e8: timer.enable = data; return;
case 0x0ec: config0ecl = data; config0ech = data>>8; return;
case 0x0ea: timer.enableIrq = data; return;//if bit 1 trigger irq14
case 0x0ee: timer.enableTxMP = data; return;
case 0x110: preBeacon = data; return;
case 0x120: config120l = data; config120h = data>>15; return;
case 0x122: config122 = data; return;
case 0x124: config124 = data; return;
case 0x128: config128 = data; return;
case 0x130: config130 = data; return;
case 0x132: config132l = data; config132h = data>>15; return;
case 0x134: configBeaconCount = data; return;
case 0x140: config140 = data; return;
case 0x142: config142 = data; return;
case 0x144: config144 = data; return;
case 0x146: config146 = data; return;
case 0x148: config148 = data; return;
case 0x14a: config14a = data; return;
case 0x14c: config14c = data; return;
case 0x150: config150l = data; config150h = data>>8; return;
case 0x154: config154l = data; config154m = data>>9; config154h = data>>11; return;
case 0x158: bb.header = data; return wifi.bbTransfer();
case 0x15a: bb.write = data; return;
case 0x160: bb.mode8 = data>>8; bb.modeE = data>>14; return;
case 0x168: bb.powerl = data>>0; bb.powerh = data>>15; return;
case 0x17c: rf.data &= 0xffff; rf.data |= data<<16; return wifi.rfTransfer();
case 0x17e: rf.data &= ~0xffff; rf.data |= data<<0; return;
case 0x184: rf.length = data>>0;
rf.type = data>>8;
rf.reserved = data>>14; return;
case 0x1a0: reg1a0l = data; reg1a0m = data>>4; reg1a0n = data>>8; reg1a0h = data>>11; return;
case 0x1a2: reg1a2 = data; return;
case 0x1a4: reg1a4 = data; return;
case 0x244: return;
case 0x254: return;
case 0x290: return;
}
//print("w ",hex<8>(addr),": unimplemented\n");
}
}

View File

@ -1,153 +0,0 @@
struct WIFI {
void power();
void bbTransfer();
void bbTransferBit();
void rfTransfer();
void rfTransferBit();
uint32 read(uint32 addr, uint32 size);
void write(uint32 addr, uint32 size, uint32 data);
uint1 powered;
uint16 reg004;
uint3 swMode, wepMode;
uint8 macAddr[6];
uint8 bssId[6];
uint4 assocIdl;
uint11 assocIdf;
uint16 rxControl, wepControl;
uint16 reg034, baseBandPower;
uint16 rxBufBegin, rxBufEnd;
uint12 rxWritePos, rxWritePosLatch;
uint12 rxReadPos, rxSoftReadPos;
uint12 rxGapBegin, rxGapSize;
uint12 rxReadCounter;
uint16 reg00a;
uint12 txWritePos;
uint12 txGapBegin, txGapSize;
uint16 txBeacon;
uint16 txMultiCmd;
uint16 txSlots[3];
uint8 txTimOffset;
uint12 txWriteCounter;
uint16 txStatControl;
uint16 txRetryLimit;
uint8 listenCount;
uint8 listenInterval;
uint10 beaconInterval;
uint2 preamble;
uint16 preBeacon;
uint11 random;
struct {
uint1 enable;
uint1 enableIrq;
uint1 enableTxMP;
uint64 count;
uint64 compare;
} timer;
uint2 config0d4;
uint12 config0d8;
uint16 config0da;
uint5 config0ecl;
uint6 config0ech;
uint9 config120l;
uint1 config120h;
uint16 config122;
uint16 config124;
uint16 config128;
uint12 config130;
uint12 config132l;
uint1 config132h;
uint16 configBeaconCount;
uint16 config140;
uint16 config142;
uint8 config144;
uint8 config146;
uint8 config148;
uint8 config14a;
uint16 config14c;
uint6 config150l;
uint8 config150h;
uint7 config154l;
uint1 config154m;
uint4 config154h;
uint2 reg1a0l;
uint2 reg1a0m;
uint1 reg1a0n;
uint1 reg1a0h;
uint2 reg1a2;
uint16 reg1a4;
struct {
uint16 enable, counterEnable, oflowEnable;
uint16 flags, counterFlags, oflowFlags;
} interrupt;
struct {
uint8 reg1b0;
uint16 reg1b2;
uint16 reg1b4;
uint16 reg1b6;
uint8 reg1b8;
uint8 reg1ba;
uint16 reg1bc;
uint16 reg1be;
uint16 txErrors;
uint16 rxPackets;
uint8 mpErrors[16];
} stats;
struct {
uint4 txIdle;
uint2 wakeRequest;
uint1 wakePending;
uint1 sleeping;
} pm;
// Baseband serial interface
struct {
uint16 header; // 01rw0000 aaaaaaaa (r/w, address)
uint8 read, write; // data sent and received
uint4 powerl;
uint1 powerh;
uint1 mode8, modeE;
uint1 busy;
uint1 clock;
uint8 time;
Event event;
uint8 regs[0x69];
} bb;
// RF serial interface
struct {
uint32 data; // r aaaaa dd dddddddd dddddddd (r/w, address, data)
uint6 length; // should be 24 bits
uint1 type; // should be 0
uint1 reserved;
uint1 busy;
uint8 time;
Event event;
uint18 regs[0x20];
} rf;
};
extern WIFI wifi;

View File

@ -1 +0,0 @@
*

View File

@ -1 +0,0 @@
*

View File

@ -1,31 +0,0 @@
<?xml version='1.0' encoding='UTF-8'?>
<system name="Nintendo DS">
<arm9>
<bios data="arm9 bios" sha256="1693983a7707ae394786fa526c0552457888a51d4e410d715ef07acd5a540555"/>
</arm9>
<arm7>
<bios data="arm7 bios" sha256="ba65f690eb04ec92db67c0e299e21ad71de087d6d5de8a9cb17a62eaab563c17"/>
</arm7>
<!-- DS
<flash model="ST M45PE20" data="flash" writeprotect="0-0x10000"/>
<pmc model="Mitsumi 3152A"/>
<tsc model="Texas Instruments TSC2046"/>
<rtc model="Seiko S35180"/>
<wifi>
<interface version="1"/>
<chipset model="Mitsumi MM3155;RF Micro Devices RF9008"/>
</wifi> -->
<!-- DS Lite -->
<flash model="ST M35PE20" data="firmware" writeprotect="0-0x3fa00"/>
<pmc model="Mitsumi 3205B"/>
<tsc model="Asahi Kasei Microsystems AK4148AVT"/>
<rtc model="Seiko S35180" data="clock.xml"/>
<wifi>
<interface version="2"/>
<chipset model="Mitsumi MM3218"/>
</wifi>
</system>

6
bsnes/Makefile → higan/Makefile Normal file → Executable file
View File

@ -4,7 +4,7 @@ fc := fc
sfc := sfc
gb := gb
gba := gba
nds := nds
# nds := nds
profile := accuracy
target := ethos
@ -105,7 +105,7 @@ sync:
rm -r phoenix/test
archive:
if [ -f bsnes.tar.xz ]; then rm bsnes.tar.xz; fi
tar -cJf bsnes.tar.xz `ls`
if [ -f higan.tar.xz ]; then rm higan.tar.xz; fi
tar -cJf higan.tar.xz `ls`
help:;

748
bsnes/data/cheats.xml → higan/data/cheats.xml Normal file → Executable file

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
<assemblyIdentity type="win32" name="bsnes" version="1.0.0.0" processorArchitecture="*"/>
<assemblyIdentity type="win32" name="higan" version="1.0.0.0" processorArchitecture="*"/>
<dependency>
<dependentAssembly>
<assemblyIdentity type="win32" name="Microsoft.Windows.Common-Controls" version="6.0.0.0" processorArchitecture="*" publicKeyToken="6595b64144ccf1df" language="*"/>

6
bsnes/data/bsnes.desktop → higan/data/higan.desktop Normal file → Executable file
View File

@ -1,8 +1,8 @@
[Desktop Entry]
Name=bsnes
Name=higan
Comment=SNES emulator
Exec=bsnes
Icon=bsnes
Exec=higan
Icon=higan
Terminal=false
Type=Application
Categories=Game;Emulator;

BIN
higan/data/higan.ico Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

BIN
higan/data/higan.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

View File

@ -2,7 +2,7 @@
#define EMULATOR_HPP
namespace Emulator {
static const char Name[] = "bsnes";
static const char Name[] = "higan";
static const char Version[] = "091";
static const char Author[] = "byuu";
static const char License[] = "GPLv3";

View File

0
bsnes/fc/Makefile → higan/fc/Makefile Normal file → Executable file
View File

0
bsnes/fc/apu/apu.cpp → higan/fc/apu/apu.cpp Normal file → Executable file
View File

0
bsnes/fc/apu/apu.hpp → higan/fc/apu/apu.hpp Normal file → Executable file
View File

0
bsnes/fc/apu/dmc.cpp → higan/fc/apu/dmc.cpp Normal file → Executable file
View File

0
bsnes/fc/apu/dmc.hpp → higan/fc/apu/dmc.hpp Normal file → Executable file
View File

View File

View File

0
bsnes/fc/apu/noise.cpp → higan/fc/apu/noise.cpp Normal file → Executable file
View File

0
bsnes/fc/apu/noise.hpp → higan/fc/apu/noise.hpp Normal file → Executable file
View File

0
bsnes/fc/apu/pulse.cpp → higan/fc/apu/pulse.cpp Normal file → Executable file
View File

0
bsnes/fc/apu/pulse.hpp → higan/fc/apu/pulse.hpp Normal file → Executable file
View File

View File

0
bsnes/fc/apu/sweep.cpp → higan/fc/apu/sweep.cpp Normal file → Executable file
View File

0
bsnes/fc/apu/sweep.hpp → higan/fc/apu/sweep.hpp Normal file → Executable file
View File

View File

View File

Some files were not shown because too many files have changed in this diff Show More