Update to v087r20 release.

byuu says:

Changelog:
- HALT waits 16 cycles before testing IRQs instead of 1 (probably less
  precise, but provides a massive speedup) [we will need to work on this
  later]
- MMIO regs for CPU/PPU simplified by combining array accesses
- custom VRAM/PRAM/OAM read/write functions that emulate 8->16-bit
  writes
- 16-bit PRAM data (decent speedup)
- emulated memory access speed (but don't handle non-sequential
  penalties or PPU access penalties yet) [amazingly, doesn't help speed
  at all]
- misc. code cleanups

For this WIP, FPS for Mr. Driller 2 went from 88fps to 172fps.
Compatibility should be unchanged. Timers are still an interesting
avenue to increase performance, but will be very tough to handle the
16MHz timers with eg a period of 65535 (overflow every single tick.) And
that's basically the last major speed boost we'll be able to get.
Blending and windowing is going to hurt performance, but it remains to
be seen how much.
This commit is contained in:
Tim Allen 2012-04-09 16:41:27 +10:00
parent 17b5bae86a
commit 01b4cb9919
16 changed files with 375 additions and 400 deletions

View File

@ -1,7 +1,7 @@
#ifndef BASE_HPP
#define BASE_HPP
static const char Version[] = "087.19";
static const char Version[] = "087.20";
#include <nall/platform.hpp>
#include <nall/algorithm.hpp>

View File

@ -25,7 +25,7 @@ void CPU::enter() {
if(regs.mode == Registers::Mode::Halt) {
if((regs.irq.enable & regs.irq.flag) == 0) {
step(1);
step(16);
continue;
}
regs.mode = Registers::Mode::Normal;
@ -46,12 +46,12 @@ void CPU::step(unsigned clocks) {
}
uint32 CPU::bus_read(uint32 addr, uint32 size) {
step(1);
step(bus.speed(addr, size));
return bus.read(addr, size);
}
void CPU::bus_write(uint32 addr, uint32 size, uint32 word) {
step(1);
step(bus.speed(addr, size));
return bus.write(addr, size, word);
}
@ -67,7 +67,6 @@ void CPU::power() {
dma.target = 0;
dma.length = 0;
dma.control = 0;
dma.active = 0;
}
for(auto &timer : regs.timer) {
timer.counter = 0;

View File

@ -2,18 +2,7 @@ void CPU::dma_run() {
for(unsigned n = 0; n < 4; n++) {
auto &dma = regs.dma[n];
if(dma.control.enable == false) {
dma.active = false;
continue;
}
if(dma.active == false) {
dma.active = true;
dma.run.target = dma.target;
dma.run.source = dma.source;
dma.run.length = dma.length;
step(2);
}
if(dma.control.enable == false) continue;
switch(dma.control.timingmode) {
case 0: break;
@ -66,5 +55,5 @@ void CPU::dma_transfer(Registers::DMA &dma) {
if(dma.control.targetmode == 3) dma.run.target = dma.target;
if(dma.control.repeat == 1) dma.run.length = dma.length;
if(dma.control.repeat == 0) dma.active = false, dma.control.enable = false;
if(dma.control.repeat == 0) dma.control.enable = false;
}

View File

@ -91,100 +91,80 @@ void CPU::write(uint32 addr, uint8 byte) {
switch(addr) {
//DMA0SAD
case 0x040000b0: regs.dma[0].source = (regs.dma[0].source & 0xffffff00) | (byte << 0); return;
case 0x040000b1: regs.dma[0].source = (regs.dma[0].source & 0xffff00ff) | (byte << 8); return;
case 0x040000b2: regs.dma[0].source = (regs.dma[0].source & 0xff00ffff) | (byte << 16); return;
case 0x040000b3: regs.dma[0].source = (regs.dma[0].source & 0x00ffffff) | (byte << 24); return;
//DMA1SAD
//DMA2SAD
//DMA3SAD
case 0x040000b0: case 0x040000b1: case 0x040000b2: case 0x040000b3:
case 0x040000bc: case 0x040000bd: case 0x040000be: case 0x040000bf:
case 0x040000c8: case 0x040000c9: case 0x040000ca: case 0x040000cb:
case 0x040000d4: case 0x040000d5: case 0x040000d6: case 0x040000d7: {
auto &dma = regs.dma[(addr - 0x040000b0) / 12];
unsigned shift = (addr & 3) * 8;
dma.source = (dma.source & ~(255 << shift)) | (byte << shift);
return;
}
//DMA0DAD
case 0x040000b4: regs.dma[0].target = (regs.dma[0].target & 0xffffff00) | (byte << 0); return;
case 0x040000b5: regs.dma[0].target = (regs.dma[0].target & 0xffff00ff) | (byte << 8); return;
case 0x040000b6: regs.dma[0].target = (regs.dma[0].target & 0xff00ffff) | (byte << 16); return;
case 0x040000b7: regs.dma[0].target = (regs.dma[0].target & 0x00ffffff) | (byte << 24); return;
//DMA1DAD
//DMA2DAD
//DMA3DAD
case 0x040000b4: case 0x040000b5: case 0x040000b6: case 0x040000b7:
case 0x040000c0: case 0x040000c1: case 0x040000c2: case 0x040000c3:
case 0x040000cc: case 0x040000cd: case 0x040000ce: case 0x040000cf:
case 0x040000d8: case 0x040000d9: case 0x040000da: case 0x040000db: {
auto &dma = regs.dma[(addr - 0x040000b4) / 12];
unsigned shift = (addr & 3) * 8;
dma.target = (dma.target & ~(255 << shift)) | (byte << shift);
return;
}
//DMA0CNT_L
case 0x040000b8: regs.dma[0].length = (regs.dma[0].length & 0xff00) | (byte << 0); return;
case 0x040000b9: regs.dma[0].length = (regs.dma[0].length & 0x00ff) | (byte << 8); return;
//DMA1CNT_L
//DMA2CNT_L
//DMA3CNT_L
case 0x040000b8: case 0x040000b9:
case 0x040000c4: case 0x040000c5:
case 0x040000d0: case 0x040000d1:
case 0x040000dc: case 0x040000dd: {
auto &dma = regs.dma[(addr - 0x040000b8) / 12];
unsigned shift = (addr & 1) * 8;
dma.length = (dma.length & ~(255 << shift)) | (byte << shift);
return;
}
//DMA0CNT_H
case 0x040000ba: regs.dma[0].control = (regs.dma[0].control & 0xff00) | (byte << 0); return;
case 0x040000bb: regs.dma[0].control = (regs.dma[0].control & 0x00ff) | (byte << 8); return;
//DMA1SAD
case 0x040000bc: regs.dma[1].source = (regs.dma[1].source & 0xffffff00) | (byte << 0); return;
case 0x040000bd: regs.dma[1].source = (regs.dma[1].source & 0xffff00ff) | (byte << 8); return;
case 0x040000be: regs.dma[1].source = (regs.dma[1].source & 0xff00ffff) | (byte << 16); return;
case 0x040000bf: regs.dma[1].source = (regs.dma[1].source & 0x00ffffff) | (byte << 24); return;
//DMA1DAD
case 0x040000c0: regs.dma[1].target = (regs.dma[1].target & 0xffffff00) | (byte << 0); return;
case 0x040000c1: regs.dma[1].target = (regs.dma[1].target & 0xffff00ff) | (byte << 8); return;
case 0x040000c2: regs.dma[1].target = (regs.dma[1].target & 0xff00ffff) | (byte << 16); return;
case 0x040000c3: regs.dma[1].target = (regs.dma[1].target & 0x00ffffff) | (byte << 24); return;
//DMA1CNT_L
case 0x040000c4: regs.dma[1].length = (regs.dma[1].length & 0xff00) | (byte << 0); return;
case 0x040000c5: regs.dma[1].length = (regs.dma[1].length & 0x00ff) | (byte << 8); return;
//DMA1CNT_H
case 0x040000c6: regs.dma[1].control = (regs.dma[1].control & 0xff00) | (byte << 0); return;
case 0x040000c7: regs.dma[1].control = (regs.dma[1].control & 0x00ff) | (byte << 8); return;
//DMA2SAD
case 0x040000c8: regs.dma[2].source = (regs.dma[2].source & 0xffffff00) | (byte << 0); return;
case 0x040000c9: regs.dma[2].source = (regs.dma[2].source & 0xffff00ff) | (byte << 8); return;
case 0x040000ca: regs.dma[2].source = (regs.dma[2].source & 0xff00ffff) | (byte << 16); return;
case 0x040000cb: regs.dma[2].source = (regs.dma[2].source & 0x00ffffff) | (byte << 24); return;
//DMA2DAD
case 0x040000cc: regs.dma[2].target = (regs.dma[2].target & 0xffffff00) | (byte << 0); return;
case 0x040000cd: regs.dma[2].target = (regs.dma[2].target & 0xffff00ff) | (byte << 8); return;
case 0x040000ce: regs.dma[2].target = (regs.dma[2].target & 0xff00ffff) | (byte << 16); return;
case 0x040000cf: regs.dma[2].target = (regs.dma[2].target & 0x00ffffff) | (byte << 24); return;
//DMA2CNT_L
case 0x040000d0: regs.dma[2].length = (regs.dma[2].length & 0xff00) | (byte << 0); return;
case 0x040000d1: regs.dma[2].length = (regs.dma[2].length & 0x00ff) | (byte << 8); return;
//DMA2CNT_H
case 0x040000d2: regs.dma[2].control = (regs.dma[2].control & 0xff00) | (byte << 0); return;
case 0x040000d3: regs.dma[2].control = (regs.dma[2].control & 0x00ff) | (byte << 8); return;
//DMA3SAD
case 0x040000d4: regs.dma[3].source = (regs.dma[3].source & 0xffffff00) | (byte << 0); return;
case 0x040000d5: regs.dma[3].source = (regs.dma[3].source & 0xffff00ff) | (byte << 8); return;
case 0x040000d6: regs.dma[3].source = (regs.dma[3].source & 0xff00ffff) | (byte << 16); return;
case 0x040000d7: regs.dma[3].source = (regs.dma[3].source & 0x00ffffff) | (byte << 24); return;
//DMA3DAD
case 0x040000d8: regs.dma[3].target = (regs.dma[3].target & 0xffffff00) | (byte << 0); return;
case 0x040000d9: regs.dma[3].target = (regs.dma[3].target & 0xffff00ff) | (byte << 8); return;
case 0x040000da: regs.dma[3].target = (regs.dma[3].target & 0xff00ffff) | (byte << 16); return;
case 0x040000db: regs.dma[3].target = (regs.dma[3].target & 0x00ffffff) | (byte << 24); return;
//DMA3CNT_L
case 0x040000dc: regs.dma[3].length = (regs.dma[3].length & 0xff00) | (byte << 0); return;
case 0x040000dd: regs.dma[3].length = (regs.dma[3].length & 0x00ff) | (byte << 8); return;
//DMA3CNT_H
case 0x040000de: regs.dma[3].control = (regs.dma[3].control & 0xff00) | (byte << 0); return;
case 0x040000df: regs.dma[3].control = (regs.dma[3].control & 0x00ff) | (byte << 8); return;
case 0x040000ba: case 0x040000bb:
case 0x040000c6: case 0x040000c7:
case 0x040000d2: case 0x040000d3:
case 0x040000de: case 0x040000df: {
auto &dma = regs.dma[(addr - 0x040000ba) / 12];
unsigned shift = (addr & 1) * 8;
bool enable = dma.control.enable;
dma.control = (dma.control & ~(255 << shift)) | (byte << shift);
if(enable == 0 && dma.control.enable) {
dma.run.target = dma.target;
dma.run.source = dma.source;
dma.run.length = dma.length;
}
return;
}
//TM0CNT_L
case 0x04000100: regs.timer[0].reload = (regs.timer[0].reload & 0xff00) | (byte << 0); return;
case 0x04000101: regs.timer[0].reload = (regs.timer[0].reload & 0x00ff) | (byte << 8); return;
//TM1CNT_L
case 0x04000104: regs.timer[1].reload = (regs.timer[1].reload & 0xff00) | (byte << 0); return;
case 0x04000105: regs.timer[1].reload = (regs.timer[1].reload & 0x00ff) | (byte << 8); return;
//TM2CNT_L
case 0x04000108: regs.timer[2].reload = (regs.timer[2].reload & 0xff00) | (byte << 0); return;
case 0x04000109: regs.timer[2].reload = (regs.timer[2].reload & 0x00ff) | (byte << 8); return;
//TM3CNT_L
case 0x0400010c: regs.timer[3].reload = (regs.timer[3].reload & 0xff00) | (byte << 0); return;
case 0x0400010d: regs.timer[3].reload = (regs.timer[3].reload & 0x00ff) | (byte << 8); return;
case 0x04000100: case 0x04000101:
case 0x04000104: case 0x04000105:
case 0x04000108: case 0x04000109:
case 0x0400010c: case 0x0400010d: {
auto &timer = regs.timer[(addr >> 2) & 3];
unsigned shift = (addr & 1) * 8;
timer.reload = (timer.reload & ~(255 << shift)) | (byte << shift);
return;
}
//TM0CNT_H
//TM1CNT_H

View File

@ -21,13 +21,11 @@ struct Registers {
DMAControl control;
//internal
uint1 active;
struct Run {
uint32 target;
uint32 source;
uint16 length;
} run;
uint32 basetarget;
} dma[4];
struct TimerControl {

View File

@ -73,6 +73,23 @@ uint32 Bus::mirror(uint32 addr, uint32 size) {
return base;
}
uint32 Bus::speed(uint32 addr, uint32 size) {
//B B E I M P V O R R R R R R S S
//I I R R M R R A O O O O O O R R
//O O A A I A A M M M M M M M A A
//S S M M O M M M M
static unsigned byte[] = { 1, 1, 3, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5 };
static unsigned half[] = { 1, 1, 3, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5 };
static unsigned word[] = { 1, 1, 6, 1, 1, 2, 2, 1, 8, 8, 8, 8, 8, 8, 8, 8 };
addr = (addr >> 24) & 15;
switch(size) {
case Byte: return byte[addr];
case Half: return half[addr];
case Word: return word[addr];
}
}
uint32 Bus::read(uint32 addr, uint32 size) {
if(addr & 0x08000000) return cartridge.read(addr, size);
@ -85,9 +102,9 @@ uint32 Bus::read(uint32 addr, uint32 size) {
if((addr & 0xfffffc00) == 0x04000000) return mmio[addr & 0x3ff]->read(addr, size);
if((addr & 0xff00ffff) == 0x04000800) return ((MMIO&)cpu).read(0x04000800 | (addr & 3), size);
return 0u;
case 0x05000000: return ppu.pram.read(addr & 0x3ff, size);
case 0x06000000: return ppu.vram.read(addr & 0x10000 ? (0x10000 + (addr & 0x7fff)) : (addr & 0xffff), size);
case 0x07000000: return ppu.oam_read(addr & 0x3ff, size);
case 0x05000000: return ppu.pram_read(addr, size);
case 0x06000000: return ppu.vram_read(addr, size);
case 0x07000000: return ppu.oam_read(addr, size);
}
}
@ -103,9 +120,9 @@ void Bus::write(uint32 addr, uint32 size, uint32 word) {
if((addr & 0xfffffc00) == 0x04000000) return mmio[addr & 0x3ff]->write(addr, size, word);
if((addr & 0xff00ffff) == 0x04000800) return ((MMIO&)cpu).write(0x04000800 | (addr & 3), size, word);
return;
case 0x05000000: return ppu.pram.write(addr & 0x3ff, size, word);
case 0x06000000: return ppu.vram.write(addr & 0x10000 ? (0x10000 + (addr & 0x7fff)) : (addr & 0xffff), size, word);
case 0x07000000: return ppu.oam_write(addr & 0x3ff, size, word);
case 0x05000000: return ppu.pram_write(addr, size, word);
case 0x06000000: return ppu.vram_write(addr, size, word);
case 0x07000000: return ppu.oam_write(addr, size, word);
}
}

View File

@ -25,6 +25,7 @@ struct Bus : Memory {
Memory *mmio[0x400];
static uint32 mirror(uint32 addr, uint32 size);
uint32 speed(uint32 addr, uint32 size);
uint32 read(uint32 addr, uint32 size);
void write(uint32 addr, uint32 size, uint32 word);
void power();

View File

@ -44,7 +44,7 @@ void PPU::render_background_linear(unsigned bgnumber) {
if(bg.control.screensize & 1) if(tx & 32) offset += 32 * 32;
if(bg.control.screensize & 2) if(ty & 32) offset += 32 * 32 * (1 + (bg.control.screensize & 1));
offset = basemap + offset * 2;
uint16 mapdata = vram.read(offset, Half);
uint16 mapdata = vram_read(offset, Half);
tile.character = mapdata >> 0;
tile.hflip = mapdata >> 10;
@ -53,12 +53,12 @@ void PPU::render_background_linear(unsigned bgnumber) {
if(bg.control.colormode == 0) {
offset = basechr + tile.character * 32 + (py ^ (tile.vflip ? 7 : 0)) * 4;
uint32 word = vram.read(offset, Word);
uint32 word = vram_read(offset, Word);
for(unsigned n = 0; n < 8; n++) data[n] = (word >> (n * 4)) & 15;
} else {
offset = basechr + tile.character * 64 + (py ^ (tile.vflip ? 7 : 0)) * 8;
uint32 wordlo = vram.read(offset + 0, Word);
uint32 wordhi = vram.read(offset + 4, Word);
uint32 wordlo = vram_read(offset + 0, Word);
uint32 wordhi = vram_read(offset + 4, Word);
for(unsigned n = 0; n < 4; n++) data[0 + n] = (wordlo >> (n * 8)) & 255;
for(unsigned n = 0; n < 4; n++) data[4 + n] = (wordhi >> (n * 8)) & 255;
}
@ -68,8 +68,8 @@ void PPU::render_background_linear(unsigned bgnumber) {
uint8 color = data[px++ ^ (tile.hflip ? 7 : 0)];
if(color) {
if(bg.control.colormode == 0) layer[bg.control.priority][x] = { true, palette(tile.palette * 16 + color) };
if(bg.control.colormode == 1) layer[bg.control.priority][x] = { true, palette(color) };
if(bg.control.colormode == 0) layer[bg.control.priority][x] = { true, pram[tile.palette * 16 + color] };
if(bg.control.colormode == 1) layer[bg.control.priority][x] = { true, pram[color] };
}
}
}
@ -93,7 +93,7 @@ void PPU::render_background_affine(unsigned bgnumber) {
if(tx < screensize && ty < screensize) {
uint8 character = vram[basemap + ty * screensize + tx];
uint8 color = vram[basechr + (character * 64) + py * 8 + px];
if(color) layer[bg.control.priority][x] = { true, palette(color) };
if(color) layer[bg.control.priority][x] = { true, pram[color] };
}
fx += bg.pa;
@ -124,10 +124,10 @@ void PPU::render_background_bitmap(unsigned bgnumber) {
if(px < width && py < height) {
unsigned offset = py * width + px;
unsigned color = vram.read(basemap + (offset << depth), size);
unsigned color = vram_read(basemap + (offset << depth), size);
if(depth || color) { //8bpp color 0 is transparent; 15bpp color is always opaque
if(depth == 0) color = palette(color);
if(depth == 0) color = pram[color];
if(depth == 1) color = color & 0x7fff;
layer[bg.control.priority][x] = { true, color };
}

169
bsnes/gba/ppu/memory.cpp Executable file
View File

@ -0,0 +1,169 @@
uint32 PPU::vram_read(uint32 addr, uint32 size) {
addr &= (addr & 0x10000) ? 0x17fff : 0x0ffff;
switch(size) {
case Word:
addr &= ~3;
return vram[addr + 0] << 0 | vram[addr + 1] << 8 | vram[addr + 2] << 16 | vram[addr + 3] << 24;
case Half:
addr &= ~1;
return vram[addr + 0] << 0 | vram[addr + 1] << 8;
case Byte:
return vram[addr];
}
}
void PPU::vram_write(uint32 addr, uint32 size, uint32 word) {
addr &= (addr & 0x10000) ? 0x17fff : 0x0ffff;
switch(size) {
case Word:
addr &= ~3;
vram[addr + 0] = word >> 0;
vram[addr + 1] = word >> 8;
vram[addr + 2] = word >> 16;
vram[addr + 3] = word >> 24;
break;
case Half:
addr &= ~1;
vram[addr + 0] = word >> 0;
vram[addr + 1] = word >> 8;
break;
case Byte:
addr &= ~1;
vram[addr + 0] = word;
vram[addr + 1] = word;
break;
}
}
uint32 PPU::pram_read(uint32 addr, uint32 size) {
if(size == Word) return pram_read(addr & ~2, Half) << 0 | pram_read(addr | 2, Half) << 16;
if(size == Byte) return pram_read(addr, Half) >> ((addr & 1) * 8);
return pram[addr >> 1 & 511];
}
void PPU::pram_write(uint32 addr, uint32 size, uint32 word) {
if(size == Word) {
pram_write(addr & ~2, Half, word >> 0);
pram_write(addr | 2, Half, word >> 16);
return;
}
if(size == Byte) {
return pram_write(addr, Half, word << 8 | word << 0);
}
pram[addr >> 1 & 511] = word & 0x7fff;
}
uint32 PPU::oam_read(uint32 addr, uint32 size) {
if(size == Word) return oam_read(addr & ~2, Half) << 0 | oam_read(addr | 2, Half) << 16;
if(size == Byte) return oam_read(addr, Half) >> ((addr & 1) * 8);
auto &obj = object[addr >> 3 & 127];
auto &par = objectparam[addr >> 5 & 31];
switch(addr & 6) {
case 0: return (
(obj.y << 0)
| (obj.affine << 8)
| (obj.affinesize << 9)
| (obj.mode << 10)
| (obj.mosaic << 12)
| (obj.colors << 13)
| (obj.shape << 14)
);
case 2: return (
(obj.x << 0)
| (obj.affineparam << 9)
| (obj.hflip << 12)
| (obj.vflip << 13)
| (obj.size << 14)
);
case 4: return (
(obj.character << 0)
| (obj.priority << 10)
| (obj.palette << 12)
);
case 6:
switch(addr >> 3 & 3) {
case 0: return par.pa;
case 1: return par.pb;
case 2: return par.pc;
case 3: return par.pd;
}
}
}
void PPU::oam_write(uint32 addr, uint32 size, uint32 word) {
if(size == Word) {
oam_write(addr & ~2, Half, word >> 0);
oam_write(addr | 2, Half, word >> 16);
return;
}
if(size == Byte) {
return oam_write(addr, Half, word << 8 | word << 0);
}
auto &obj = object[addr >> 3 & 127];
auto &par = objectparam[addr >> 5 & 31];
switch(addr & 6) {
case 0:
obj.y = word >> 0;
obj.affine = word >> 8;
obj.affinesize = word >> 9;
obj.mode = word >> 10;
obj.mosaic = word >> 12;
obj.colors = word >> 13;
obj.shape = word >> 14;
break;
case 2:
obj.x = word >> 0;
obj.affineparam = word >> 9;
obj.hflip = word >> 12;
obj.vflip = word >> 13;
obj.size = word >> 14;
break;
case 4:
obj.character = word >> 0;
obj.priority = word >> 10;
obj.palette = word >> 12;
break;
case 6:
switch(addr >> 3 & 3) {
case 0: par.pa = word; break;
case 1: par.pb = word; break;
case 2: par.pc = word; break;
case 3: par.pd = word; break;
}
}
static unsigned widths[] = {
8, 16, 32, 64,
16, 32, 32, 64,
8, 8, 16, 32,
8, 8, 8, 8, //invalid modes
};
static unsigned heights[] = {
8, 16, 32, 64,
8, 8, 16, 32,
16, 32, 32, 64,
8, 8, 8, 8, //invalid modes
};
obj.width = widths [obj.shape * 4 + obj.size];
obj.height = heights[obj.shape * 4 + obj.size];
}

View File

@ -17,21 +17,15 @@ uint8 PPU::read(uint32 addr) {
case 0x04000006: return regs.vcounter >> 0;
case 0x04000007: return regs.vcounter >> 8;
//BG0CNT
case 0x04000008: return regs.bg[0].control >> 0;
case 0x04000009: return regs.bg[0].control >> 8;
//BG1CNT
case 0x0400000a: return regs.bg[1].control >> 0;
case 0x0400000b: return regs.bg[1].control >> 8;
//BG2CNT
case 0x0400000c: return regs.bg[2].control >> 0;
case 0x0400000d: return regs.bg[2].control >> 8;
//BG3CNT
case 0x0400000e: return regs.bg[3].control >> 0;
case 0x0400000f: return regs.bg[3].control >> 8;
//BG0CNT,BG1CNT,BG2CNT,BG3CNT
case 0x04000008: case 0x04000009:
case 0x0400000a: case 0x0400000b:
case 0x0400000c: case 0x0400000d:
case 0x0400000e: case 0x0400000f: {
auto &bg = regs.bg[(addr >> 1) & 3];
unsigned shift = (addr & 1) * 8;
return bg.control >> shift;
}
//WININ
case 0x04000048: return regs.window[0].in;
@ -69,117 +63,92 @@ void PPU::write(uint32 addr, uint8 byte) {
regs.status.vcompare = byte;
return;
//BG0CNT
case 0x04000008: regs.bg[0].control = (regs.bg[0].control & 0xff00) | (byte << 0); return;
case 0x04000009: regs.bg[0].control = (regs.bg[0].control & 0x00ff) | (byte << 8); return;
//BG0CNT,BG1CNT,BG2CNT,BG3CNT
case 0x04000008: case 0x04000009:
case 0x0400000a: case 0x0400000b:
case 0x0400000c: case 0x0400000d:
case 0x0400000e: case 0x0400000f: {
auto &bg = regs.bg[(addr >> 1) & 3];
unsigned shift = (addr & 1) * 8;
bg.control = (bg.control & ~(255 << shift)) | (byte << shift);
return;
}
//BG1CNT
case 0x0400000a: regs.bg[1].control = (regs.bg[1].control & 0xff00) | (byte << 0); return;
case 0x0400000b: regs.bg[1].control = (regs.bg[1].control & 0x00ff) | (byte << 8); return;
//BG0HOFS,BG1HOFS,BG2BOFS,BG3HOFS
case 0x04000010: case 0x04000011:
case 0x04000014: case 0x04000015:
case 0x04000018: case 0x04000019:
case 0x0400001c: case 0x0400001d: {
auto &bg = regs.bg[(addr >> 2) & 3];
unsigned shift = (addr & 1) * 8;
bg.hoffset = (bg.hoffset & ~(255 << shift)) | (byte << shift);
return;
}
//BG2CNT
case 0x0400000c: regs.bg[2].control = (regs.bg[2].control & 0xff00) | (byte << 0); return;
case 0x0400000d: regs.bg[2].control = (regs.bg[2].control & 0x00ff) | (byte << 8); return;
//BG0VOFS,BG1VOFS,BG2VOFS,BG3VOFS
case 0x04000012: case 0x04000013:
case 0x04000016: case 0x04000017:
case 0x0400001a: case 0x0400001b:
case 0x0400001e: case 0x0400001f: {
auto &bg = regs.bg[(addr >> 2) & 3];
unsigned shift = (addr & 1) * 8;
bg.voffset = (bg.voffset & ~(255 << shift)) | (byte << shift);
return;
}
//BG3CNT
case 0x0400000e: regs.bg[3].control = (regs.bg[3].control & 0xff00) | (byte << 0); return;
case 0x0400000f: regs.bg[3].control = (regs.bg[3].control & 0x00ff) | (byte << 8); return;
//BG2PA,BG3PA
case 0x04000020: case 0x04000021:
case 0x04000030: case 0x04000031: {
auto &bg = regs.bg[(addr >> 4) & 3];
unsigned shift = (addr & 1) * 8;
bg.pa = (bg.pa & ~(255 << shift)) | (byte << shift);
return;
}
//BG0HOFS
case 0x04000010: regs.bg[0].hoffset = (regs.bg[0].hoffset & 0xff00) | (byte << 0); return;
case 0x04000011: regs.bg[0].hoffset = (regs.bg[0].hoffset & 0x00ff) | (byte << 8); return;
//BG2PB,BG3PB
case 0x04000022: case 0x04000023:
case 0x04000032: case 0x04000033: {
auto &bg = regs.bg[(addr >> 4) & 3];
unsigned shift = (addr & 1) * 8;
bg.pb = (bg.pb & ~(255 << shift)) | (byte << shift);
return;
}
//BG0VOFS
case 0x04000012: regs.bg[0].voffset = (regs.bg[0].voffset & 0xff00) | (byte << 0); return;
case 0x04000013: regs.bg[0].voffset = (regs.bg[0].voffset & 0x00ff) | (byte << 8); return;
//BG2PC,BG3PC
case 0x04000024: case 0x04000025:
case 0x04000034: case 0x04000035: {
auto &bg = regs.bg[(addr >> 4) & 3];
unsigned shift = (addr & 1) * 8;
bg.pc = (bg.pc & ~(255 << shift)) | (byte << shift);
return;
}
//BG1HOFS
case 0x04000014: regs.bg[1].hoffset = (regs.bg[1].hoffset & 0xff00) | (byte << 0); return;
case 0x04000015: regs.bg[1].hoffset = (regs.bg[1].hoffset & 0x00ff) | (byte << 8); return;
//BG2PD,BG3PD
case 0x04000026: case 0x04000027:
case 0x04000036: case 0x04000037: {
auto &bg = regs.bg[(addr >> 4) & 3];
unsigned shift = (addr & 1) * 8;
bg.pd = (bg.pd & ~(255 << shift)) | (byte << shift);
return;
}
//BG1VOFS
case 0x04000016: regs.bg[1].voffset = (regs.bg[1].voffset & 0xff00) | (byte << 0); return;
case 0x04000017: regs.bg[1].voffset = (regs.bg[1].voffset & 0x00ff) | (byte << 8); return;
//BG2X_L,BG2X_H,BG3X_L,BG3X_H
case 0x04000028: case 0x04000029: case 0x0400002a: case 0x0400002b:
case 0x04000038: case 0x04000039: case 0x0400003a: case 0x0400003b: {
auto &bg = regs.bg[(addr >> 4) & 3];
unsigned shift = (addr & 3) * 8;
bg.lx = bg.x = (bg.x & ~(255 << shift)) | (byte << shift);
return;
}
//BG2HOFS
case 0x04000018: regs.bg[2].hoffset = (regs.bg[2].hoffset & 0xff00) | (byte << 0); return;
case 0x04000019: regs.bg[2].hoffset = (regs.bg[2].hoffset & 0x00ff) | (byte << 8); return;
//BG2VOFS
case 0x0400001a: regs.bg[2].voffset = (regs.bg[2].voffset & 0xff00) | (byte << 0); return;
case 0x0400001b: regs.bg[2].voffset = (regs.bg[2].voffset & 0x00ff) | (byte << 8); return;
//BG3HOFS
case 0x0400001c: regs.bg[3].hoffset = (regs.bg[3].hoffset & 0xff00) | (byte << 0); return;
case 0x0400001d: regs.bg[3].hoffset = (regs.bg[3].hoffset & 0x00ff) | (byte << 8); return;
//BG3VOFS
case 0x0400001e: regs.bg[3].voffset = (regs.bg[3].voffset & 0xff00) | (byte << 0); return;
case 0x0400001f: regs.bg[3].voffset = (regs.bg[3].voffset & 0x00ff) | (byte << 8); return;
//BG2PA
case 0x04000020: regs.bg[2].pa = (regs.bg[2].pa & 0xff00) | (byte << 0); return;
case 0x04000021: regs.bg[2].pa = (regs.bg[2].pa & 0x00ff) | (byte << 8); return;
//BG2PB
case 0x04000022: regs.bg[2].pb = (regs.bg[2].pb & 0xff00) | (byte << 0); return;
case 0x04000023: regs.bg[2].pb = (regs.bg[2].pb & 0x00ff) | (byte << 8); return;
//BG2PC
case 0x04000024: regs.bg[2].pc = (regs.bg[2].pc & 0xff00) | (byte << 0); return;
case 0x04000025: regs.bg[2].pc = (regs.bg[2].pc & 0x00ff) | (byte << 8); return;
//BG2PD
case 0x04000026: regs.bg[2].pd = (regs.bg[2].pd & 0xff00) | (byte << 0); return;
case 0x04000027: regs.bg[2].pd = (regs.bg[2].pd & 0x00ff) | (byte << 8); return;
//BG2X_L
case 0x04000028: regs.bg[2].lx = regs.bg[2].x = (regs.bg[2].x & 0xffffff00) | (byte << 0); return;
case 0x04000029: regs.bg[2].lx = regs.bg[2].x = (regs.bg[2].x & 0xffff00ff) | (byte << 8); return;
//BG2X_H
case 0x0400002a: regs.bg[2].lx = regs.bg[2].x = (regs.bg[2].x & 0xff00ffff) | (byte << 16); return;
case 0x0400002b: regs.bg[2].lx = regs.bg[2].x = (regs.bg[2].x & 0x00ffffff) | (byte << 24); return;
//BG2Y_L
case 0x0400002c: regs.bg[2].ly = regs.bg[2].y = (regs.bg[2].y & 0xffffff00) | (byte << 0); return;
case 0x0400002d: regs.bg[2].ly = regs.bg[2].y = (regs.bg[2].y & 0xffff00ff) | (byte << 8); return;
//BG2Y_H
case 0x0400002e: regs.bg[2].ly = regs.bg[2].y = (regs.bg[2].y & 0xff00ffff) | (byte << 16); return;
case 0x0400002f: regs.bg[2].ly = regs.bg[2].y = (regs.bg[2].y & 0x00ffffff) | (byte << 24); return;
//BG3PA
case 0x04000030: regs.bg[3].pa = (regs.bg[3].pa & 0xff00) | (byte << 0); return;
case 0x04000031: regs.bg[3].pa = (regs.bg[3].pa & 0x00ff) | (byte << 8); return;
//BG3PB
case 0x04000032: regs.bg[3].pb = (regs.bg[3].pb & 0xff00) | (byte << 0); return;
case 0x04000033: regs.bg[3].pb = (regs.bg[3].pb & 0x00ff) | (byte << 8); return;
//BG3PC
case 0x04000034: regs.bg[3].pc = (regs.bg[3].pc & 0xff00) | (byte << 0); return;
case 0x04000035: regs.bg[3].pc = (regs.bg[3].pc & 0x00ff) | (byte << 8); return;
//BG3PD
case 0x04000036: regs.bg[3].pd = (regs.bg[3].pd & 0xff00) | (byte << 0); return;
case 0x04000037: regs.bg[3].pd = (regs.bg[3].pd & 0x00ff) | (byte << 8); return;
//BG3X_L
case 0x04000038: regs.bg[3].lx = regs.bg[3].x = (regs.bg[3].x & 0xffffff00) | (byte << 0); return;
case 0x04000039: regs.bg[3].lx = regs.bg[3].x = (regs.bg[3].x & 0xffff00ff) | (byte << 8); return;
//BG3X_H
case 0x0400003a: regs.bg[3].lx = regs.bg[3].x = (regs.bg[3].x & 0xff00ffff) | (byte << 16); return;
case 0x0400003b: regs.bg[3].lx = regs.bg[3].x = (regs.bg[3].x & 0x00ffffff) | (byte << 24); return;
//BG3Y_L
case 0x0400003c: regs.bg[3].ly = regs.bg[3].y = (regs.bg[3].y & 0xffffff00) | (byte << 0); return;
case 0x0400003d: regs.bg[3].ly = regs.bg[3].y = (regs.bg[3].y & 0xffff00ff) | (byte << 8); return;
//BG3Y_H
case 0x0400003e: regs.bg[3].ly = regs.bg[3].y = (regs.bg[3].y & 0xff00ffff) | (byte << 16); return;
case 0x0400003f: regs.bg[3].ly = regs.bg[3].y = (regs.bg[3].y & 0x00ffffff) | (byte << 24); return;
//BG2Y_L,BG2Y_H,BG3Y_L,BG3Y_H
case 0x0400002c: case 0x0400002d: case 0x0400002e: case 0x0400002f:
case 0x0400003c: case 0x0400003d: case 0x0400003e: case 0x0400003f: {
auto &bg = regs.bg[(addr >> 4) & 3];
unsigned shift = (addr & 3) * 8;
bg.ly = bg.y = (bg.y & ~(255 << shift)) | (byte << shift);
return;
}
//WIN0H
case 0x04000040: regs.window[0].x2 = byte; return;

View File

@ -33,8 +33,8 @@ void PPU::render_object_linear(Object &obj) {
if(obj.colors == 0) color = (px & 1) ? color >> 4 : color & 15;
if(color) {
if(obj.colors == 0) layer[obj.priority][sx] = { true, palette(256 + obj.palette * 16 + color) };
if(obj.colors == 1) layer[obj.priority][sx] = { true, palette(256 + color) };
if(obj.colors == 0) layer[obj.priority][sx] = { true, pram[256 + obj.palette * 16 + color] };
if(obj.colors == 1) layer[obj.priority][sx] = { true, pram[256 + color] };
}
}
}
@ -75,8 +75,8 @@ void PPU::render_object_affine(Object &obj) {
if(obj.colors == 0) color = (px & 1) ? color >> 4 : color & 15;
if(color) {
if(obj.colors == 0) layer[obj.priority][sx] = { true, palette(256 + obj.palette * 16 + color) };
if(obj.colors == 1) layer[obj.priority][sx] = { true, palette(256 + color) };
if(obj.colors == 0) layer[obj.priority][sx] = { true, pram[256 + obj.palette * 16 + color] };
if(obj.colors == 1) layer[obj.priority][sx] = { true, pram[256 + color] };
}
}
@ -84,142 +84,3 @@ void PPU::render_object_affine(Object &obj) {
fy += pc;
}
}
uint32 PPU::oam_read(uint32 addr, uint32 size) {
uint32 word = 0;
switch(size) {
case Word:
addr &= ~3;
word |= oam_read(addr + 0) << 0;
word |= oam_read(addr + 1) << 8;
word |= oam_read(addr + 2) << 16;
word |= oam_read(addr + 3) << 24;
break;
case Half:
word |= oam_read(addr + 0) << 0;
word |= oam_read(addr + 1) << 8;
break;
case Byte:
word |= oam_read(addr + 0) << 0;
break;
}
return word;
}
//16-bit bus (8-bit writes are ignored)
void PPU::oam_write(uint32 addr, uint32 size, uint32 word) {
switch(size) {
case Word:
addr &= ~3;
oam_write(addr + 0, word >> 0);
oam_write(addr + 1, word >> 8);
oam_write(addr + 2, word >> 16);
oam_write(addr + 3, word >> 24);
break;
case Half:
addr &= ~1;
oam_write(addr + 0, word >> 0);
oam_write(addr + 1, word >> 8);
break;
}
}
uint8 PPU::oam_read(uint32 addr) {
auto &obj = object[(addr >> 3) & 127];
auto &par = objectparam[(addr >> 5) & 31];
switch(addr & 7) {
case 0: return (obj.y);
case 1: return (obj.affine << 0) + (obj.affinesize << 1) + (obj.mode << 2) + (obj.mosaic << 4) + (obj.colors << 5) + (obj.shape << 6);
case 2: return (obj.x >> 0);
case 3: return (obj.x >> 8) + (obj.affineparam << 1) + (obj.hflip << 4) + (obj.vflip << 5) + (obj.size << 6);
case 4: return (obj.character >> 0);
case 5: return (obj.character >> 8) + (obj.priority << 2) + (obj.palette << 4);
case 6:
switch((addr >> 3) & 3) {
case 0: return par.pa >> 0;
case 1: return par.pb >> 0;
case 2: return par.pc >> 0;
case 3: return par.pd >> 0;
}
case 7:
switch((addr >> 3) & 3) {
case 0: return par.pa >> 8;
case 1: return par.pb >> 8;
case 2: return par.pc >> 8;
case 3: return par.pd >> 8;
}
}
}
void PPU::oam_write(uint32 addr, uint8 byte) {
auto &obj = object[(addr >> 3) & 127];
auto &par = objectparam[(addr >> 5) & 31];
switch(addr & 7) {
case 0:
obj.y = byte;
break;
case 1:
obj.affine = byte >> 0;
obj.affinesize = byte >> 1;
obj.mode = byte >> 2;
obj.mosaic = byte >> 4;
obj.colors = byte >> 5;
obj.shape = byte >> 6;
break;
case 2:
obj.x = (obj.x & 0xff00) | (byte << 0);
break;
case 3:
obj.x = (obj.x & 0x00ff) | (byte << 8);
obj.affineparam = byte >> 1;
obj.hflip = byte >> 4;
obj.vflip = byte >> 5;
obj.size = byte >> 6;
break;
case 4:
obj.character = (obj.character & 0xff00) | (byte << 0);
break;
case 5:
obj.character = (obj.character & 0x00ff) | (byte << 8);
obj.priority = byte >> 2;
obj.palette = byte >> 4;
break;
case 6:
switch((addr >> 3) & 3) {
case 0: par.pa = (par.pa & 0xff00) | (byte << 0); break;
case 1: par.pb = (par.pb & 0xff00) | (byte << 0); break;
case 2: par.pc = (par.pc & 0xff00) | (byte << 0); break;
case 3: par.pd = (par.pd & 0xff00) | (byte << 0); break;
}
break;
case 7:
switch((addr >> 3) & 3) {
case 0: par.pa = (par.pa & 0x00ff) | (byte << 8); break;
case 1: par.pb = (par.pb & 0x00ff) | (byte << 8); break;
case 2: par.pc = (par.pc & 0x00ff) | (byte << 8); break;
case 3: par.pd = (par.pd & 0x00ff) | (byte << 8); break;
}
break;
}
static unsigned widths[] = {
8, 16, 32, 64,
16, 32, 32, 64,
8, 8, 16, 32,
0, 0, 0, 0, //8?
};
static unsigned heights[] = {
8, 16, 32, 64,
8, 8, 16, 32,
16, 32, 32, 64,
0, 0, 0, 0, //8?
};
obj.width = widths [obj.shape * 4 + obj.size];
obj.height = heights[obj.shape * 4 + obj.size];
}

View File

@ -17,6 +17,7 @@ namespace GBA {
#include "object.cpp"
#include "screen.cpp"
#include "mmio.cpp"
#include "memory.cpp"
PPU ppu;
void PPU::Enter() { ppu.enter(); }
@ -35,11 +36,11 @@ void PPU::step(unsigned clocks) {
void PPU::power() {
create(PPU::Enter, 16777216);
for(unsigned n = 0; n < vram.size; n++) vram.data[n] = 0;
for(unsigned n = 0; n < pram.size; n++) pram.data[n] = 0;
//for(unsigned n = 0; n < vram.size; n++) vram.data[n] = 0;
for(unsigned n = 0; n < 240 * 160; n++) output[n] = 0;
for(unsigned n = 0; n < 1024; n++) oam_write(n, 0);
for(unsigned n = 0; n < 1024; n += 2) pram_write(n, Half, 0x0000);
for(unsigned n = 0; n < 1024; n += 2) oam_write(n, Half, 0x0000);
regs.control = 0;
regs.greenswap = 0;
@ -138,8 +139,6 @@ void PPU::frame() {
}
PPU::PPU() {
vram.data = new uint8[vram.size = 96 * 1024];
pram.data = new uint8[pram.size = 1024];
output = new uint16[240 * 160];
}

View File

@ -1,6 +1,6 @@
struct PPU : Thread, MMIO {
StaticMemory vram;
StaticMemory pram;
uint8 vram[96 * 1024];
uint16 pram[512];
#include "registers.hpp"
#include "state.hpp"
uint16 *output;
@ -16,6 +16,15 @@ struct PPU : Thread, MMIO {
uint8 read(uint32 addr);
void write(uint32 addr, uint8 byte);
uint32 vram_read(uint32 addr, uint32 size);
void vram_write(uint32 addr, uint32 size, uint32 word);
uint32 pram_read(uint32 addr, uint32 size);
void pram_write(uint32 addr, uint32 size, uint32 word);
uint32 oam_read(uint32 addr, uint32 size);
void oam_write(uint32 addr, uint32 size, uint32 word);
void render_backgrounds();
void render_background_linear(unsigned bgnumber);
void render_background_affine(unsigned bgnumber);
@ -24,12 +33,7 @@ struct PPU : Thread, MMIO {
void render_objects();
void render_object_linear(Object&);
void render_object_affine(Object&);
uint32 oam_read(uint32 addr, uint32 size);
void oam_write(uint32 addr, uint32 size, uint32 word);
uint8 oam_read(uint32 addr);
void oam_write(uint32 addr, uint8 byte);
uint15 palette(uint9 index);
void render_forceblank();
void render_screen();

View File

@ -1,10 +1,3 @@
uint15 PPU::palette(uint9 index) {
uint15 result = 0;
result |= pram[index * 2 + 0] << 0;
result |= pram[index * 2 + 1] << 8;
return result;
}
void PPU::render_forceblank() {
uint16 *line = output + regs.vcounter * 240;
for(unsigned x = 0; x < 240; x++) line[x] = 0x7fff;
@ -13,7 +6,7 @@ void PPU::render_forceblank() {
void PPU::render_screen() {
uint16 *line = output + regs.vcounter * 240;
for(unsigned x = 0; x < 240; x++) {
uint15 color = palette(0) & 0x7fff;
auto color = pram[0];
if(layer[3][x].exists) color = layer[3][x].color;
if(layer[2][x].exists) color = layer[2][x].color;
if(layer[1][x].exists) color = layer[1][x].color;

View File

@ -9,11 +9,9 @@ void ARM::arm_step() {
pipeline.fetch.instruction = read(pipeline.fetch.address, Word);
pipeline_step();
step(2);
}
pipeline_step();
step(2);
if(processor.irqline && cpsr().i == 0) {
vector(0x00000018, Processor::Mode::IRQ);

View File

@ -9,11 +9,9 @@ void ARM::thumb_step() {
pipeline.fetch.instruction = read(pipeline.fetch.address, Half);
pipeline_step();
step(1);
}
pipeline_step();
step(1);
if(processor.irqline && cpsr().i == 0) {
vector(0x00000018, Processor::Mode::IRQ);