From b08f02c4f3265c78f53ffd4597f9fcb4db5d85b2 Mon Sep 17 00:00:00 2001 From: Lior Halphon Date: Sat, 3 Mar 2018 15:47:36 +0200 Subject: [PATCH] Rewriting the PPU rendering: T-cycle accurate background rendering. DMG only, CGB completely broken --- Core/display.c | 209 +++++++++++++++++++++++++++++++++++----------- Core/gb.h | 20 +++++ Core/save_state.c | 12 +++ 3 files changed, 191 insertions(+), 50 deletions(-) diff --git a/Core/display.c b/Core/display.c index 56845668..df338908 100755 --- a/Core/display.c +++ b/Core/display.c @@ -4,6 +4,43 @@ #include #include "gb.h" +/* FIFO functions */ + +static inline unsigned fifo_size(GB_fifo_t *fifo) +{ + return (fifo->write_end - fifo->read_end) & 0xF; +} + +static void fifo_clear(GB_fifo_t *fifo) +{ + fifo->read_end = fifo->write_end = 0; +} + +static GB_fifo_item_t *fifo_pop(GB_fifo_t *fifo) +{ + GB_fifo_item_t *ret = &fifo->fifo[fifo->read_end]; + fifo->read_end++; + fifo->read_end &= 0xF; + return ret; +} + +static void fifo_push_bg_row(GB_fifo_t *fifo, uint8_t lower, uint8_t upper, uint8_t palette, bool bg_priority) +{ + for (unsigned i = 8; i--;) { + fifo->fifo[fifo->write_end] = (GB_fifo_item_t) { + (lower >> 7) | ((upper >> 7) << 1), + palette, + 0, + bg_priority, + }; + lower <<= 1; + upper <<= 1; + + fifo->write_end++; + fifo->write_end &= 0xF; + } +} + /* Each line is 456 cycles, approximately: Mode 2 - 80 cycles / OAM Transfer @@ -41,7 +78,8 @@ static bool window_enabled(GB_gameboy_t *gb) return (gb->io_registers[GB_IO_LCDC] & 0x20) && gb->io_registers[GB_IO_WX] < 167; } -static uint32_t get_pixel(GB_gameboy_t *gb, uint8_t x, uint8_t y) +/* Kept as a reference until I finish rewriting the PPU */ +static uint32_t __attribute__((unused)) get_pixel(GB_gameboy_t *gb, uint8_t x, uint8_t y) { /* Bit 7 - LCD Display Enable (0=Off, 1=On) @@ -337,12 +375,6 @@ void GB_STAT_update(GB_gameboy_t *gb) } } -static unsigned scx_delay(GB_gameboy_t *gb) -{ - return (gb->effective_scx & 7) + 0; - -} - void GB_lcd_off(GB_gameboy_t *gb) { gb->display_state = 0; @@ -354,7 +386,6 @@ void GB_lcd_off(GB_gameboy_t *gb) gb->io_registers[GB_IO_LY] = 0; gb->io_registers[GB_IO_STAT] &= ~3; gb->io_registers[GB_IO_STAT] |= 4; - gb->effective_scx = gb->io_registers[GB_IO_SCX]; if (gb->hdma_on_hblank) { gb->hdma_on_hblank = false; gb->hdma_on = false; @@ -376,6 +407,31 @@ void GB_lcd_off(GB_gameboy_t *gb) gb->ly_for_comparison = 0; } +static void render_pixel_if_possible(GB_gameboy_t *gb) +{ + /* Drop pixels for scrollings */ + if (gb->position_in_line >= 160 || gb->disable_rendering) { + gb->position_in_line++; + if (fifo_size(&gb->bg_fifo) != 0) { + fifo_pop(&gb->bg_fifo); + } + return; + } + +#ifndef NDEBUG + if (fifo_size(&gb->bg_fifo) == 0) { + GB_log(gb, "Defective BG FIFO!\n"); + return; + } +#endif + + GB_fifo_item_t *fifo_item = fifo_pop(&gb->bg_fifo); + + uint8_t pixel = ((gb->io_registers[GB_IO_BGP] >> (fifo_item->pixel << 1)) & 3); + gb->screen[gb->position_in_line + gb->current_line * WIDTH] = gb->background_palettes_rgb[pixel]; + gb->position_in_line++; +} + void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) { @@ -397,14 +453,21 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) GB_STATE(gb, display, 15); GB_STATE(gb, display, 16); GB_STATE(gb, display, 17); - GB_STATE(gb, display, 18); - GB_STATE(gb, display, 19); + + GB_STATE(gb, display, 21); + GB_STATE(gb, display, 22); + GB_STATE(gb, display, 23); + GB_STATE(gb, display, 24); + GB_STATE(gb, display, 25); + GB_STATE(gb, display, 26); + GB_STATE(gb, display, 27); + GB_STATE(gb, display, 28); } if (!(gb->io_registers[GB_IO_LCDC] & 0x80)) { while (true) { - GB_SLEEP(gb, display, 18, LCDC_PERIOD); + GB_SLEEP(gb, display, 1, LCDC_PERIOD); display_vblank(gb); } return; @@ -418,28 +481,30 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) gb->vram_read_blocked = false; gb->oam_write_blocked = false; gb->vram_write_blocked = false; + gb->cycles_for_line = MODE2_LENGTH - 4; GB_STAT_update(gb); - GB_SLEEP(gb, display, 1, MODE2_LENGTH - 4); + GB_SLEEP(gb, display, 2, MODE2_LENGTH - 4); gb->io_registers[GB_IO_STAT] &= ~3; gb->io_registers[GB_IO_STAT] |= 3; - gb->effective_scx = gb->io_registers[GB_IO_SCX]; gb->oam_read_blocked = true; gb->vram_read_blocked = true; gb->oam_write_blocked = true; gb->vram_write_blocked = true; GB_STAT_update(gb); - GB_SLEEP(gb, display, 2, MODE3_LENGTH + scx_delay(gb) + 2); + gb->cycles_for_line += MODE3_LENGTH + (gb->io_registers[GB_IO_SCX] & 7) + 2; + GB_SLEEP(gb, display, 3, MODE3_LENGTH + (gb->io_registers[GB_IO_SCX] & 7) + 2); gb->io_registers[GB_IO_STAT] &= ~3; gb->oam_read_blocked = false; gb->vram_read_blocked = false; gb->oam_write_blocked = false; gb->vram_write_blocked = false; - GB_SLEEP(gb, display, 17, 1); + gb->cycles_for_line += 1; + GB_SLEEP(gb, display, 4, 1); GB_STAT_update(gb); /* Mode 0 is shorter in the very first line */ - GB_SLEEP(gb, display, 3, MODE0_LENGTH - scx_delay(gb) - 2 - 1 - 4); + GB_SLEEP(gb, display, 5, LINE_LENGTH - gb->cycles_for_line - 8); gb->current_line = 1; while (true) { @@ -450,7 +515,7 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) gb->oam_write_blocked = false; gb->ly_for_comparison = gb->current_line? -1 : gb->current_line; GB_STAT_update(gb); - GB_SLEEP(gb, display, 4, 3); + GB_SLEEP(gb, display, 6, 3); /* The OAM STAT interrupt occurs 1 T-cycle before STAT actually changes, except on line 1. PPU glitch? */ @@ -460,55 +525,100 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) GB_STAT_update(gb); gb->io_registers[GB_IO_STAT] &= ~3; } - GB_SLEEP(gb, display, 19, 1); + GB_SLEEP(gb, display, 7, 1); gb->io_registers[GB_IO_STAT] &= ~3; gb->io_registers[GB_IO_STAT] |= 2; gb->oam_write_blocked = true; gb->ly_for_comparison = gb->current_line; GB_STAT_update(gb); - GB_SLEEP(gb, display, 5, MODE2_LENGTH - 4); + GB_SLEEP(gb, display, 8, MODE2_LENGTH - 4); gb->vram_read_blocked = true; gb->vram_write_blocked = false; gb->oam_write_blocked = false; GB_STAT_update(gb); - GB_SLEEP(gb, display, 6, 4); + GB_SLEEP(gb, display, 9, 4); gb->io_registers[GB_IO_STAT] &= ~3; gb->io_registers[GB_IO_STAT] |= 3; - gb->effective_scx = gb->io_registers[GB_IO_SCX]; gb->vram_write_blocked = true; gb->oam_write_blocked = true; GB_STAT_update(gb); - for (gb->position_in_line = 0; gb->position_in_line < WIDTH + 7; gb->position_in_line++) { - if (!gb->disable_rendering) { - signed screen_pos = (signed) gb->position_in_line - (gb->effective_scx & 0x7); - if (((screen_pos + gb->effective_scx) & 7) == 0) { - gb->effective_scy = gb->io_registers[GB_IO_SCY]; - } - if (screen_pos >= 0 && screen_pos < WIDTH) { - gb->screen[gb->current_line * WIDTH + screen_pos] = get_pixel(gb, screen_pos, gb->current_line); + gb->cycles_for_line = MODE2_LENGTH + 4; + fifo_clear(&gb->bg_fifo); + gb->position_in_line = - (gb->io_registers[GB_IO_SCX] & 7) - 8; + gb->fetcher_x = ((gb->io_registers[GB_IO_SCX]) / 8) & 0x1f; + +#define RENDER_AND_SLEEP(label) \ +{ \ +render_pixel_if_possible(gb); \ +if (gb->position_in_line == 160) break; \ +else if (gb->position_in_line == 159) {\ + gb->io_registers[GB_IO_STAT] &= ~3; \ + gb->oam_read_blocked = false; \ + gb->vram_read_blocked = false; \ + gb->oam_write_blocked = false; \ + gb->vram_write_blocked = false; \ + if (gb->hdma_on_hblank) { \ + gb->hdma_on = true; \ + gb->hdma_cycles = 0; \ + } \ +} \ +gb->cycles_for_line++; \ +GB_SLEEP(gb, display, label, 1); \ +} + gb->cycles_for_line += 6; + GB_SLEEP(gb, display, 10, 6); + + /* The actual rendering cycle */ + while (true) { + // First read; the tile + RENDER_AND_SLEEP(21); + { + uint16_t map = 0x1800; + if (gb->io_registers[GB_IO_LCDC] & 0x08) { + map = 0x1C00; } + uint8_t y = gb->current_line + gb->io_registers[GB_IO_SCY]; + gb->current_tile = gb->vram[map + gb->fetcher_x + y / 8 * 32]; + gb->fetcher_x++; + gb->fetcher_x &= 0x1f; } - GB_SLEEP(gb, display, 15, 1); + RENDER_AND_SLEEP(22); + + // Second read, lower tile data + RENDER_AND_SLEEP(23); + { + if (gb->io_registers[GB_IO_LCDC] & 0x10) { + gb->current_tile_address = gb->current_tile * 0x10; + } + else { + gb->current_tile_address = (int8_t)gb->current_tile * 0x10 + 0x1000; + } + gb->current_tile_data[0] = + gb->vram[gb->current_tile_address + ((gb->current_line + gb->io_registers[GB_IO_SCY]) & 7) * 2]; + } + RENDER_AND_SLEEP(24); + + + // Third read, upper tile data + RENDER_AND_SLEEP(25); + gb->current_tile_data[1] = + gb->vram[gb->current_tile_address + ((gb->current_line + gb->io_registers[GB_IO_SCY]) & 7) * 2 + 1]; + RENDER_AND_SLEEP(26); + + + // The cycle ends with a fetcher sleep + RENDER_AND_SLEEP(27); + RENDER_AND_SLEEP(28); + + fifo_push_bg_row(&gb->bg_fifo, gb->current_tile_data[0], gb->current_tile_data[1], 0, false); } - GB_SLEEP(gb, display, 7, MODE3_LENGTH + scx_delay(gb) - WIDTH - 7); - - gb->io_registers[GB_IO_STAT] &= ~3; - gb->oam_read_blocked = false; - gb->vram_read_blocked = false; - gb->oam_write_blocked = false; - gb->vram_write_blocked = false; - if (gb->hdma_on_hblank) { - gb->hdma_on = true; - gb->hdma_cycles = 0; - } - GB_SLEEP(gb, display, 16, 1); GB_STAT_update(gb); - GB_SLEEP(gb, display, 8, MODE0_LENGTH - scx_delay(gb) - 4 - 1); + GB_SLEEP(gb, display, 11, LINE_LENGTH - gb->cycles_for_line); } /* Lines 144 - 152 */ @@ -516,7 +626,7 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) gb->io_registers[GB_IO_LY] = gb->current_line; gb->ly_for_comparison = -1; GB_STAT_update(gb); - GB_SLEEP(gb, display, 9, 4); + GB_SLEEP(gb, display, 12, 4); gb->ly_for_comparison = gb->current_line; if (gb->current_line == LINES) { @@ -544,27 +654,27 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) } GB_STAT_update(gb); - GB_SLEEP(gb, display, 10, LINE_LENGTH - 4); + GB_SLEEP(gb, display, 13, LINE_LENGTH - 4); } /* Lines 153 */ gb->io_registers[GB_IO_LY] = 153; gb->ly_for_comparison = -1; GB_STAT_update(gb); - GB_SLEEP(gb, display, 11, 4); + GB_SLEEP(gb, display, 14, 4); gb->io_registers[GB_IO_LY] = 0; gb->ly_for_comparison = 153; GB_STAT_update(gb); - GB_SLEEP(gb, display, 12, 4); + GB_SLEEP(gb, display, 15, 4); gb->ly_for_comparison = -1; GB_STAT_update(gb); - GB_SLEEP(gb, display, 13, 4); + GB_SLEEP(gb, display, 16, 4); gb->ly_for_comparison = 0; GB_STAT_update(gb); - GB_SLEEP(gb, display, 14, LINE_LENGTH - 12); + GB_SLEEP(gb, display, 17, LINE_LENGTH - 12); gb->io_registers[GB_IO_STAT] &= ~3; @@ -728,7 +838,6 @@ uint8_t GB_get_oam_info(GB_gameboy_t *gb, GB_oam_info_t *dest, uint8_t *sprite_h } } - for (unsigned i = 0; i < count; i++) { uint16_t vram_address = dest[i].tile * 0x10; uint8_t flags = dest[i].flags; diff --git a/Core/gb.h b/Core/gb.h index 9ff5853a..2ea65bde 100644 --- a/Core/gb.h +++ b/Core/gb.h @@ -189,6 +189,19 @@ typedef struct { struct GB_breakpoint_s; struct GB_watchpoint_s; +typedef struct { + uint8_t pixel; // Color, 0-3 + uint8_t palette; // Palette, 0 - 7 (CGB); 0-1 in DMG (or just 0 for BG) + uint8_t priority; // Sprite priority – 0 in DMG, OAM index in CGB + bool bg_priority; // For sprite FIFO – the BG priority bit. For the BG FIFO – the CGB attributes priority bit +} GB_fifo_item_t; + +typedef struct { + GB_fifo_item_t fifo[16]; + uint8_t read_end; + uint8_t write_end; +} GB_fifo_t; + /* When state saving, each section is dumped independently of other sections. This allows adding data to the end of the section without worrying about future compatibility. Some other changes might be "safe" as well. @@ -391,6 +404,13 @@ struct GB_gameboy_internal_s { uint8_t effective_scy; // SCY is latched when starting to draw a tile uint8_t current_line; uint16_t ly_for_comparison; + GB_fifo_t bg_fifo, oam_fifo; + uint8_t fetcher_x; + uint16_t cycles_for_line; + uint8_t current_tile; + uint16_t current_tile_address; // TODO: is this actually cached? If not, it could be used to "mix" two tiles + uint8_t current_tile_data[2]; + ); /* Unsaved data. This includes all pointers, as well as everything that shouldn't be on a save state */ diff --git a/Core/save_state.c b/Core/save_state.c index 933b4178..e060ae60 100644 --- a/Core/save_state.c +++ b/Core/save_state.c @@ -218,6 +218,12 @@ int GB_load_state(GB_gameboy_t *gb, const char *path) GB_palette_changed(gb, true, i * 2); } + gb->bg_fifo.read_end &= 0xF; + gb->bg_fifo.write_end &= 0xF; + gb->oam_fifo.read_end &= 0xF; + gb->oam_fifo.write_end &= 0xF; + gb->current_tile_address &= (gb->vram_size - 1); + error: fclose(f); return errno; @@ -308,6 +314,12 @@ int GB_load_state_from_buffer(GB_gameboy_t *gb, const uint8_t *buffer, size_t le GB_palette_changed(gb, true, i * 2); } + gb->bg_fifo.read_end &= 0xF; + gb->bg_fifo.write_end &= 0xF; + gb->oam_fifo.read_end &= 0xF; + gb->oam_fifo.write_end &= 0xF; + gb->current_tile_address &= (gb->vram_size - 1); + return 0; }