From 96d4b17056cb35ca26eaca95f1a37978ad17722a Mon Sep 17 00:00:00 2001 From: alyosha-tas Date: Tue, 31 Mar 2020 12:35:03 -0400 Subject: [PATCH] GBHawk: bug fixes to for GBC --- libHawk/GBHawk/GBHawk/LR35902.h | 68 +++--- libHawk/GBHawk/GBHawk/PPU.h | 365 ++++++++++---------------------- 2 files changed, 142 insertions(+), 291 deletions(-) diff --git a/libHawk/GBHawk/GBHawk/LR35902.h b/libHawk/GBHawk/GBHawk/LR35902.h index ba5c64e2a0..f4347ea849 100644 --- a/libHawk/GBHawk/GBHawk/LR35902.h +++ b/libHawk/GBHawk/GBHawk/LR35902.h @@ -2156,7 +2156,7 @@ namespace GBHawk #pragma region Operations - void Read_Func(uint32_t dest, uint32_t src_l, uint32_t src_h) + inline void Read_Func(uint32_t dest, uint32_t src_l, uint32_t src_h) { uint32_t addr = (uint32_t)(Regs[src_l] | (Regs[src_h]) << 8); //if (CDLCallback != null) @@ -2168,24 +2168,24 @@ namespace GBHawk } // special read for POP AF that always clears the lower 4 bits of F - void Read_Func_F(uint32_t dest, uint32_t src_l, uint32_t src_h) + inline void Read_Func_F(uint32_t dest, uint32_t src_l, uint32_t src_h) { Regs[dest] = (ReadMemory((uint32_t)(Regs[src_l] | (Regs[src_h]) << 8)) & 0xF0); } - void Write_Func(uint32_t dest_l, uint32_t dest_h, uint32_t src) + inline void Write_Func(uint32_t dest_l, uint32_t dest_h, uint32_t src) { uint32_t addr = (uint32_t)(Regs[dest_l] | (Regs[dest_h]) << 8); //CDLCallback ? .Invoke(addr, eCDLogMemFlags.Write | eCDLogMemFlags.Data); WriteMemory(addr, Regs[src]); } - void TR_Func(uint32_t dest, uint32_t src) + inline void TR_Func(uint32_t dest, uint32_t src) { Regs[dest] = Regs[src]; } - void ADD16_Func(uint32_t dest_l, uint32_t dest_h, uint32_t src_l, uint32_t src_h) + inline void ADD16_Func(uint32_t dest_l, uint32_t dest_h, uint32_t src_l, uint32_t src_h) { Reg16_d = Regs[dest_l] | (Regs[dest_h] << 8); Reg16_s = Regs[src_l] | (Regs[src_h] << 8); @@ -2210,7 +2210,7 @@ namespace GBHawk Regs[dest_h] = (uint8_t)ans_h; } - void ADD8_Func(uint32_t dest, uint32_t src) + inline void ADD8_Func(uint32_t dest, uint32_t src) { Reg16_d = Regs[dest]; Reg16_d += Regs[src]; @@ -2231,7 +2231,7 @@ namespace GBHawk Regs[dest] = (uint8_t)ans; } - void SUB8_Func(uint32_t dest, uint32_t src) + inline void SUB8_Func(uint32_t dest, uint32_t src) { Reg16_d = Regs[dest]; Reg16_d -= Regs[src]; @@ -2251,29 +2251,29 @@ namespace GBHawk Regs[dest] = (uint8_t)ans; } - void BIT_Func(uint32_t bit, uint32_t src) + inline void BIT_Func(uint32_t bit, uint32_t src) { FlagZset(!((Regs[src] & (1 << bit)) > 0)); FlagHset(true); FlagNset(false); } - void SET_Func(uint32_t bit, uint32_t src) + inline void SET_Func(uint32_t bit, uint32_t src) { Regs[src] |= (uint8_t)(1 << bit); } - void RES_Func(uint32_t bit, uint32_t src) + inline void RES_Func(uint32_t bit, uint32_t src) { Regs[src] &= (uint8_t)(0xFF - (1 << bit)); } - void ASGN_Func(uint32_t src, uint32_t val) + inline void ASGN_Func(uint32_t src, uint32_t val) { Regs[src] = (uint8_t)val; } - void SWAP_Func(uint32_t src) + inline void SWAP_Func(uint32_t src) { temp = (uint32_t)((Regs[src] << 4) & 0xF0); Regs[src] = (uint8_t)(temp | (Regs[src] >> 4)); @@ -2284,7 +2284,7 @@ namespace GBHawk FlagCset(false); } - void SLA_Func(uint32_t src) + inline void SLA_Func(uint32_t src) { FlagCset((Regs[src] & 0x80) > 0); @@ -2295,7 +2295,7 @@ namespace GBHawk FlagNset(false); } - void SRA_Func(uint32_t src) + inline void SRA_Func(uint32_t src) { FlagCset((Regs[src] & 1) > 0); @@ -2308,7 +2308,7 @@ namespace GBHawk FlagNset(false); } - void SRL_Func(uint32_t src) + inline void SRL_Func(uint32_t src) { FlagCset((Regs[src] & 1) > 0); @@ -2319,7 +2319,7 @@ namespace GBHawk FlagNset(false); } - void CPL_Func(uint32_t src) + inline void CPL_Func(uint32_t src) { Regs[src] = (uint8_t)((~Regs[src]) & 0xFF); @@ -2327,21 +2327,21 @@ namespace GBHawk FlagNset(true); } - void CCF_Func(uint32_t src) + inline void CCF_Func(uint32_t src) { FlagCset(!FlagCget()); FlagHset(false); FlagNset(false); } - void SCF_Func(uint32_t src) + inline void SCF_Func(uint32_t src) { FlagCset(true); FlagHset(false); FlagNset(false); } - void AND8_Func(uint32_t dest, uint32_t src) + inline void AND8_Func(uint32_t dest, uint32_t src) { Regs[dest] = (uint8_t)(Regs[dest] & Regs[src]); @@ -2351,7 +2351,7 @@ namespace GBHawk FlagNset(false); } - void OR8_Func(uint32_t dest, uint32_t src) + inline void OR8_Func(uint32_t dest, uint32_t src) { Regs[dest] = (uint8_t)(Regs[dest] | Regs[src]); @@ -2361,7 +2361,7 @@ namespace GBHawk FlagNset(false); } - void XOR8_Func(uint32_t dest, uint32_t src) + inline void XOR8_Func(uint32_t dest, uint32_t src) { Regs[dest] = (uint8_t)(Regs[dest] ^ Regs[src]); @@ -2371,7 +2371,7 @@ namespace GBHawk FlagNset(false); } - void CP8_Func(uint32_t dest, uint32_t src) + inline void CP8_Func(uint32_t dest, uint32_t src) { Reg16_d = Regs[dest]; Reg16_d -= Regs[src]; @@ -2388,7 +2388,7 @@ namespace GBHawk FlagNset(true); } - void RRC_Func(uint32_t src) + inline void RRC_Func(uint32_t src) { imm = src == Aim; if (imm) { src = A; } @@ -2402,7 +2402,7 @@ namespace GBHawk FlagNset(false); } - void RR_Func(uint32_t src) + inline void RR_Func(uint32_t src) { imm = src == Aim; if (imm) { src = A; } @@ -2418,7 +2418,7 @@ namespace GBHawk FlagNset(false); } - void RLC_Func(uint32_t src) + inline void RLC_Func(uint32_t src) { imm = src == Aim; if (imm) { src = A; } @@ -2433,7 +2433,7 @@ namespace GBHawk FlagNset(false); } - void RL_Func(uint32_t src) + inline void RL_Func(uint32_t src) { imm = src == Aim; if (imm) { src = A; } @@ -2448,7 +2448,7 @@ namespace GBHawk FlagNset(false); } - void INC8_Func(uint32_t src) + inline void INC8_Func(uint32_t src) { Reg16_d = Regs[src]; Reg16_d += 1; @@ -2467,7 +2467,7 @@ namespace GBHawk Regs[src] = (uint8_t)ans; } - void DEC8_Func(uint32_t src) + inline void DEC8_Func(uint32_t src) { Reg16_d = Regs[src]; Reg16_d -= 1; @@ -2486,7 +2486,7 @@ namespace GBHawk Regs[src] = (uint8_t)ans; } - void INC16_Func(uint32_t src_l, uint32_t src_h) + inline void INC16_Func(uint32_t src_l, uint32_t src_h) { Reg16_d = Regs[src_l] | (Regs[src_h] << 8); @@ -2496,7 +2496,7 @@ namespace GBHawk Regs[src_h] = (uint8_t)((Reg16_d & 0xFF00) >> 8); } - void DEC16_Func(uint32_t src_l, uint32_t src_h) + inline void DEC16_Func(uint32_t src_l, uint32_t src_h) { Reg16_d = Regs[src_l] | (Regs[src_h] << 8); @@ -2506,7 +2506,7 @@ namespace GBHawk Regs[src_h] = (uint8_t)((Reg16_d & 0xFF00) >> 8); } - void ADC8_Func(uint32_t dest, uint32_t src) + inline void ADC8_Func(uint32_t dest, uint32_t src) { Reg16_d = Regs[dest]; c = FlagCget() ? 1 : 0; @@ -2528,7 +2528,7 @@ namespace GBHawk Regs[dest] = (uint8_t)ans; } - void SBC8_Func(uint32_t dest, uint32_t src) + inline void SBC8_Func(uint32_t dest, uint32_t src) { Reg16_d = Regs[dest]; c = FlagCget() ? 1 : 0; @@ -2551,7 +2551,7 @@ namespace GBHawk } // DA code courtesy of AWJ: http://forums.nesdev.com/viewtopic.php?f=20&t=15944 - void DA_Func(uint32_t src) + inline void DA_Func(uint32_t src) { a_d = (uint8_t)Regs[src]; @@ -2575,7 +2575,7 @@ namespace GBHawk } // used for signed operations - void ADDS_Func(uint32_t dest_l, uint32_t dest_h, uint32_t src_l, uint32_t src_h) + inline void ADDS_Func(uint32_t dest_l, uint32_t dest_h, uint32_t src_l, uint32_t src_h) { Reg16_d = Regs[dest_l]; Reg16_s = Regs[src_l]; diff --git a/libHawk/GBHawk/GBHawk/PPU.h b/libHawk/GBHawk/GBHawk/PPU.h index fddd5bd0df..a3454464c1 100644 --- a/libHawk/GBHawk/GBHawk/PPU.h +++ b/libHawk/GBHawk/GBHawk/PPU.h @@ -1,8 +1,3 @@ -#include -#include -#include -#include - using namespace std; namespace GBHawk @@ -219,13 +214,6 @@ namespace GBHawk } - // normal DMA moves twice as fast in double speed mode on GBC - // So give it it's own function so we can seperate it from PPU tick - virtual void DMA_tick() - { - - } - virtual void OAM_scan(uint32_t OAM_cycle) { @@ -243,14 +231,95 @@ namespace GBHawk } - virtual void color_compute_BG() + void color_compute_BG() { + uint32_t R; + uint32_t G; + uint32_t B; + if ((BG_bytes_index % 2) == 0) + { + R = (uint32_t)(BG_bytes[BG_bytes_index] & 0x1F); + G = (uint32_t)(((BG_bytes[BG_bytes_index] & 0xE0) | ((BG_bytes[BG_bytes_index + 1] & 0x03) << 8)) >> 5); + B = (uint32_t)((BG_bytes[BG_bytes_index + 1] & 0x7C) >> 2); + } + else + { + R = (uint32_t)(BG_bytes[BG_bytes_index - 1] & 0x1F); + G = (uint32_t)(((BG_bytes[BG_bytes_index - 1] & 0xE0) | ((BG_bytes[BG_bytes_index] & 0x03) << 8)) >> 5); + B = (uint32_t)((BG_bytes[BG_bytes_index] & 0x7C) >> 2); + } + + uint32_t retR = ((R * 13 + G * 2 + B) >> 1) & 0xFF; + uint32_t retG = ((G * 3 + B) << 1) & 0xFF; + uint32_t retB = ((R * 3 + G * 2 + B * 11) >> 1) & 0xFF; + + BG_palette[BG_bytes_index >> 1] = (uint32_t)(0xFF000000 | (retR << 16) | (retG << 8) | retB); } void color_compute_OBJ() { + uint32_t R; + uint32_t G; + uint32_t B; + if ((OBJ_bytes_index % 2) == 0) + { + R = (uint32_t)(OBJ_bytes[OBJ_bytes_index] & 0x1F); + G = (uint32_t)(((OBJ_bytes[OBJ_bytes_index] & 0xE0) | ((OBJ_bytes[OBJ_bytes_index + 1] & 0x03) << 8)) >> 5); + B = (uint32_t)((OBJ_bytes[OBJ_bytes_index + 1] & 0x7C) >> 2); + } + else + { + R = (uint32_t)(OBJ_bytes[OBJ_bytes_index - 1] & 0x1F); + G = (uint32_t)(((OBJ_bytes[OBJ_bytes_index - 1] & 0xE0) | ((OBJ_bytes[OBJ_bytes_index] & 0x03) << 8)) >> 5); + B = (uint32_t)((OBJ_bytes[OBJ_bytes_index] & 0x7C) >> 2); + } + + uint32_t retR = ((R * 13 + G * 2 + B) >> 1) & 0xFF; + uint32_t retG = ((G * 3 + B) << 1) & 0xFF; + uint32_t retB = ((R * 3 + G * 2 + B * 11) >> 1) & 0xFF; + + OBJ_palette[OBJ_bytes_index >> 1] = (uint32_t)(0xFF000000 | (retR << 16) | (retG << 8) | retB); + } + + // normal DMA moves twice as fast in double speed mode on GBC + // So give it it's own function so we can seperate it from PPU tick + void DMA_tick() + { + // Note that DMA is halted when the CPU is halted + if (DMA_start && !cpu_halted[0]) + { + if (DMA_clock >= 4) + { + DMA_OAM_access = false; + if ((DMA_clock % 4) == 1) + { + // the cpu can't access memory during this time, but we still need the ppu to be able to. + DMA_start = false; + // Gekkio reports that A14 being high on DMA transfers always represent WRAM accesses + // So transfers nominally from higher memory areas are actually still from there (i.e. FF -> DF) + uint8_t DMA_actual = DMA_addr; + if (DMA_addr > 0xDF) { DMA_actual &= 0xDF; } + DMA_byte = ReadMemory(((uint32_t)(DMA_actual << 8) + DMA_inc)); + DMA_start = true; + } + else if ((DMA_clock % 4) == 3) + { + OAM[DMA_inc] = DMA_byte; + + if (DMA_inc < (0xA0 - 1)) { DMA_inc++; } + } + } + + DMA_clock++; + + if (DMA_clock == 648) + { + DMA_start = false; + DMA_OAM_access = true; + } + } } #pragma endregion @@ -924,7 +993,7 @@ namespace GBHawk // x-scroll is expected to be latched one cycle later // this is fine since nothing has started in the rendering until the second cycle // calculate the column number of the tile to start with - x_tile = (uint32_t)floor((float)(scroll_x) / 8.0); + x_tile = (uint32_t)(scroll_x >> 3); render_offset = scroll_x % 8; } @@ -986,7 +1055,7 @@ namespace GBHawk // x-scroll is expected to be latched one cycle later // this is fine since nothing has started in the rendering until the second cycle // calculate the column number of the tile to start with - x_tile = (uint32_t)floor((float)(scroll_x) / 8.0); + x_tile = (uint32_t)(scroll_x >> 3); render_offset = scroll_x % 8; } @@ -1162,7 +1231,8 @@ namespace GBHawk window_counter = 0; render_counter = 0; - window_x_tile = (uint32_t)floor((float)(pixel_counter - (window_x_latch - 7)) / 8.0); + // NOTE: pixel counter is >= window_x_latch - 7 here, so subtraction will result in a positive number + window_x_tile = (uint32_t)((pixel_counter - ((int32_t)window_x_latch - 7)) >> 3); window_tile_inc = 0; window_started = true; @@ -1281,7 +1351,7 @@ namespace GBHawk if ((internal_cycle % 2) == 1) { // calculate the row number of the tiles to be fetched - y_tile = ((uint32_t)floor(((float)((float)scroll_y + (float)LY)) / (float)8.0)) % 32; + y_tile = ((uint32_t)((uint32_t)scroll_y + (uint32_t)LY) >> 3) % 32; temp_fetch = y_tile * 32 + (x_tile + tile_inc) % 32; tile_byte = VRAM[0x1800 + (((LCDC & 0x8) > 0) ? 1 : 0) * 0x400 + temp_fetch]; @@ -1562,7 +1632,7 @@ namespace GBHawk else if (((last_eval + render_offset) % 8) == 6) { sprite_fetch_counter += 0; } else if (((last_eval + render_offset) % 8) == 7) { sprite_fetch_counter += 0; } - consecutive_sprite = (uint32_t)floor((double)(last_eval + render_offset) / 8.0) * 8 + 8 - render_offset; + consecutive_sprite = (uint32_t)(((last_eval + render_offset) >> 3) << 3) + 8 - render_offset; // special case exists here for sprites at zero with non-zero x-scroll. Not sure exactly the reason for it. if (last_eval == 0 && render_offset != 0) @@ -1642,45 +1712,6 @@ namespace GBHawk } } - // normal DMA moves twice as fast in double speed mode on GBC - // So give it it's own function so we can seperate it from PPU tick - void DMA_tick() - { - // Note that DMA is halted when the CPU is halted - if (DMA_start && !cpu_halted[0]) - { - if (DMA_clock >= 4) - { - DMA_OAM_access = false; - if ((DMA_clock % 4) == 1) - { - // the cpu can't access memory during this time, but we still need the ppu to be able to. - DMA_start = false; - // Gekkio reports that A14 being high on DMA transfers always represent WRAM accesses - // So transfers nominally from higher memory areas are actually still from there (i.e. FF -> DF) - uint8_t DMA_actual = DMA_addr; - if (DMA_addr > 0xDF) { DMA_actual &= 0xDF; } - DMA_byte = ReadMemory(((uint32_t)(DMA_actual << 8) + DMA_inc)); - DMA_start = true; - } - else if ((DMA_clock % 4) == 3) - { - OAM[DMA_inc] = DMA_byte; - - if (DMA_inc < (0xA0 - 1)) { DMA_inc++; } - } - } - - DMA_clock++; - - if (DMA_clock == 648) - { - DMA_start = false; - DMA_OAM_access = true; - } - } - } - // order sprites according to x coordinate // note that for sprites of equal x coordinate, priority goes to first on the list void reorder_and_assemble_sprites() @@ -2106,8 +2137,8 @@ namespace GBHawk else { VRAM[(VRAM_Bank[0] * 0x2000) + cur_DMA_dest] = HDMA_byte; - cur_DMA_dest = (uint8_t)((cur_DMA_dest + 1) & 0x1FFF); - cur_DMA_src = (uint8_t)((cur_DMA_src + 1) & 0xFFFF); + cur_DMA_dest = (uint32_t)((cur_DMA_dest + 1) & 0x1FFF); + cur_DMA_src = (uint32_t)((cur_DMA_src + 1) & 0xFFFF); HDMA_length--; } @@ -2344,7 +2375,7 @@ namespace GBHawk // x-scroll is expected to be latched one cycle later // this is fine since nothing has started in the rendering until the second cycle // calculate the column number of the tile to start with - x_tile = (uint32_t)floor((float)(scroll_x) / 8.0); + x_tile = (uint32_t)(scroll_x >> 3); render_offset = scroll_x % 8; } @@ -2404,7 +2435,7 @@ namespace GBHawk // x-scroll is expected to be latched one cycle later // this is fine since nothing has started in the rendering until the second cycle // calculate the column number of the tile to start with - x_tile = (uint32_t)floor((float)(scroll_x) / 8.0); + x_tile = (uint32_t)(scroll_x >> 3); render_offset = scroll_x % 8; } @@ -2605,7 +2636,8 @@ namespace GBHawk window_counter = 0; render_counter = 0; - window_x_tile = (uint32_t)floor((float)(pixel_counter - (window_x_latch - 7)) / 8.0); + // NOTE: pixel counter is >= window_x_latch - 7 here, so subtraction will result in a positive number + window_x_tile = (uint32_t)((pixel_counter - ((int32_t)window_x_latch - 7)) >> 3); window_tile_inc = 0; window_started = true; @@ -2735,11 +2767,11 @@ namespace GBHawk if ((internal_cycle % 2) == 1) { // calculate the row number of the tiles to be fetched - y_tile = ((uint32_t)floor((float)((uint32_t)scroll_y + LY) / 8.0)) % 32; + y_tile = ((uint32_t)((uint32_t)scroll_y + (uint32_t)LY) >> 3) % 32; temp_fetch = y_tile * 32 + (x_tile + tile_inc) % 32; - tile_byte = VRAM[0x1800 + (((LCDC & 0x4) > 0) ? 1 : 0) * 0x400 + temp_fetch]; - tile_data[2] = VRAM[0x3800 + (((LCDC & 0x4) > 0) ? 1 : 0) * 0x400 + temp_fetch]; + tile_byte = VRAM[0x1800 + (((LCDC & 0x8) > 0) ? 1 : 0) * 0x400 + temp_fetch]; + tile_data[2] = VRAM[0x3800 + (((LCDC & 0x8) > 0) ? 1 : 0) * 0x400 + temp_fetch]; VRAM_sel = ((tile_data[2] & 0x8) > 0) ? 1 : 0; BG_V_flip = ((tile_data[2] & 0x40) > 0); @@ -3048,7 +3080,7 @@ namespace GBHawk else if (((last_eval + render_offset) % 8) == 6) { sprite_fetch_counter += 0; } else if (((last_eval + render_offset) % 8) == 7) { sprite_fetch_counter += 0; } - consecutive_sprite = (uint32_t)floor(((double)last_eval + render_offset) / 8.0) * 8 + 8 - render_offset; + consecutive_sprite = (uint32_t)(((last_eval + render_offset) >> 3) << 3) + 8 - render_offset; // special case exists here for sprites at zero with non-zero x-scroll. Not sure exactly the reason for it. if (last_eval == 0 && render_offset != 0) @@ -3130,45 +3162,6 @@ namespace GBHawk } } - // normal DMA moves twice as fast in double speed mode on GBC - // So give it it's own function so we can seperate it from PPU tick - void DMA_tick() - { - // Note that DMA is halted when the CPU is halted - if (DMA_start && !cpu_halted[0]) - { - if (DMA_clock >= 4) - { - DMA_OAM_access = false; - if ((DMA_clock % 4) == 1) - { - // the cpu can't access memory during this time, but we still need the ppu to be able to. - DMA_start = false; - // Gekkio reports that A14 being high on DMA transfers always represent WRAM accesses - // So transfers nominally from higher memory areas are actually still from there (i.e. FF -> DF) - uint8_t DMA_actual = DMA_addr; - if (DMA_addr > 0xDF) { DMA_actual &= 0xDF; } - DMA_byte = ReadMemory((uint32_t)((DMA_actual << 8) + DMA_inc)); - DMA_start = true; - } - else if ((DMA_clock % 4) == 3) - { - OAM[DMA_inc] = DMA_byte; - - if (DMA_inc < (0xA0 - 1)) { DMA_inc++; } - } - } - - DMA_clock++; - - if (DMA_clock == 648) - { - DMA_start = false; - DMA_OAM_access = true; - } - } - } - // order sprites according to x coordinate // note that for sprites of equal x coordinate, priority goes to first on the list void reorder_and_assemble_sprites() @@ -3302,58 +3295,6 @@ namespace GBHawk } } - void color_compute_BG() - { - uint32_t R; - uint32_t G; - uint32_t B; - - if ((BG_bytes_index % 2) == 0) - { - R = (uint32_t)(BG_bytes[BG_bytes_index] & 0x1F); - G = (uint32_t)(((BG_bytes[BG_bytes_index] & 0xE0) | ((BG_bytes[BG_bytes_index + 1] & 0x03) << 8)) >> 5); - B = (uint32_t)((BG_bytes[BG_bytes_index + 1] & 0x7C) >> 2); - } - else - { - R = (uint32_t)(BG_bytes[BG_bytes_index - 1] & 0x1F); - G = (uint32_t)(((BG_bytes[BG_bytes_index - 1] & 0xE0) | ((BG_bytes[BG_bytes_index] & 0x03) << 8)) >> 5); - B = (uint32_t)((BG_bytes[BG_bytes_index] & 0x7C) >> 2); - } - - uint32_t retR = ((R * 13 + G * 2 + B) >> 1) & 0xFF; - uint32_t retG = ((G * 3 + B) << 1) & 0xFF; - uint32_t retB = ((R * 3 + G * 2 + B * 11) >> 1) & 0xFF; - - BG_palette[BG_bytes_index >> 1] = (uint32_t)(0xFF000000 | (retR << 16) | (retG << 8) | retB); - } - - void color_compute_OBJ() - { - uint32_t R; - uint32_t G; - uint32_t B; - - if ((OBJ_bytes_index % 2) == 0) - { - R = (uint32_t)(OBJ_bytes[OBJ_bytes_index] & 0x1F); - G = (uint32_t)(((OBJ_bytes[OBJ_bytes_index] & 0xE0) | ((OBJ_bytes[OBJ_bytes_index + 1] & 0x03) << 8)) >> 5); - B = (uint32_t)((OBJ_bytes[OBJ_bytes_index + 1] & 0x7C) >> 2); - } - else - { - R = (uint32_t)(OBJ_bytes[OBJ_bytes_index - 1] & 0x1F); - G = (uint32_t)(((OBJ_bytes[OBJ_bytes_index - 1] & 0xE0) | ((OBJ_bytes[OBJ_bytes_index] & 0x03) << 8)) >> 5); - B = (uint32_t)((OBJ_bytes[OBJ_bytes_index] & 0x7C) >> 2); - } - - uint32_t retR = ((R * 13 + G * 2 + B) >> 1) & 0xFF; - uint32_t retG = ((G * 3 + B) << 1) & 0xFF; - uint32_t retB = ((R * 3 + G * 2 + B * 11) >> 1) & 0xFF; - - OBJ_palette[OBJ_bytes_index >> 1] = (uint32_t)(0xFF000000 | (retR << 16) | (retG << 8) | retB); - } - void Reset() { LCDC = 0; @@ -3668,8 +3609,8 @@ namespace GBHawk else { VRAM[(VRAM_Bank[0] * 0x2000) + cur_DMA_dest] = HDMA_byte; - cur_DMA_dest = (uint8_t)((cur_DMA_dest + 1) & 0x1FFF); - cur_DMA_src = (uint8_t)((cur_DMA_src + 1) & 0xFFFF); + cur_DMA_dest = (uint32_t)((cur_DMA_dest + 1) & 0x1FFF); + cur_DMA_src = (uint32_t)((cur_DMA_src + 1) & 0xFFFF); HDMA_length--; } @@ -3906,7 +3847,7 @@ namespace GBHawk // x-scroll is expected to be latched one cycle later // this is fine since nothing has started in the rendering until the second cycle // calculate the column number of the tile to start with - x_tile = (uint32_t)floor((float)(scroll_x) / 8.0); + x_tile = (uint32_t)(scroll_x >> 3); render_offset = scroll_x % 8; } @@ -3966,7 +3907,7 @@ namespace GBHawk // x-scroll is expected to be latched one cycle later // this is fine since nothing has started in the rendering until the second cycle // calculate the column number of the tile to start with - x_tile = (uint32_t)floor((float)(scroll_x) / 8.0); + x_tile = (uint32_t)(scroll_x >> 3); render_offset = scroll_x % 8; } @@ -4160,7 +4101,8 @@ namespace GBHawk window_counter = 0; render_counter = 0; - window_x_tile = (uint32_t)floor((float)(pixel_counter - (window_x_latch - 7)) / 8.0); + // NOTE: pixel counter is >= window_x_latch - 7 here, so subtraction will result in a positive number + window_x_tile = (uint32_t)((pixel_counter - ((int32_t)window_x_latch - 7)) >> 3); window_tile_inc = 0; window_started = true; @@ -4330,11 +4272,11 @@ namespace GBHawk if ((internal_cycle % 2) == 1) { // calculate the row number of the tiles to be fetched - y_tile = ((uint32_t)floor(((float)scroll_y + (float)LY) / (float)8.0)) % 32; + y_tile = ((uint32_t)((uint32_t)scroll_y + (uint32_t)LY) >> 3) % 32; temp_fetch = y_tile * 32 + (x_tile + tile_inc) % 32; - tile_byte = VRAM[0x1800 + (((LCDC & 0x4) > 0) ? 1 : 0) * 0x400 + temp_fetch]; - tile_data[2] = VRAM[0x3800 + (((LCDC & 0x4) > 0) ? 1 : 0) * 0x400 + temp_fetch]; + tile_byte = VRAM[0x1800 + (((LCDC & 0x8) > 0) ? 1 : 0) * 0x400 + temp_fetch]; + tile_data[2] = VRAM[0x3800 + (((LCDC & 0x8) > 0) ? 1 : 0) * 0x400 + temp_fetch]; VRAM_sel = ((tile_data[2] & 0x8) > 0) ? 1 : 0; BG_V_flip = ((tile_data[2] & 0x40) > 0) & GBC_compat[0]; @@ -4644,7 +4586,7 @@ namespace GBHawk else if (((last_eval + render_offset) % 8) == 7) { sprite_fetch_counter += 0; } - consecutive_sprite = (uint32_t)floor((((float)last_eval + (float)render_offset) / (float)8.0)) * 8 + 8 - render_offset; + consecutive_sprite = (uint32_t)(((last_eval + render_offset) >> 3) << 3) + 8 - render_offset; // special case exists here for sprites at zero with non-zero x-scroll. Not sure exactly the reason for it. if (last_eval == 0 && render_offset != 0) @@ -4726,45 +4668,6 @@ namespace GBHawk } } - // normal DMA moves twice as fast in double speed mode on GBC - // So give it it's own function so we can seperate it from PPU tick - void DMA_tick() - { - // Note that DMA is halted when the CPU is halted - if (DMA_start && !cpu_halted[0]) - { - if (DMA_clock >= 4) - { - DMA_OAM_access = false; - if ((DMA_clock % 4) == 1) - { - // the cpu can't access memory during this time, but we still need the ppu to be able to. - DMA_start = false; - // Gekkio reports that A14 being high on DMA transfers always represent WRAM accesses - // So transfers nominally from higher memory areas are actually still from there (i.e. FF -> DF) - uint8_t DMA_actual = DMA_addr; - if (DMA_addr > 0xDF) { DMA_actual &= 0xDF; } - DMA_byte = ReadMemory((uint32_t)((DMA_actual << 8) + DMA_inc)); - DMA_start = true; - } - else if ((DMA_clock % 4) == 3) - { - OAM[DMA_inc] = DMA_byte; - - if (DMA_inc < (0xA0 - 1)) { DMA_inc++; } - } - } - - DMA_clock++; - - if (DMA_clock == 648) - { - DMA_start = false; - DMA_OAM_access = true; - } - } - } - // order sprites according to x coordinate // note that for sprites of equal x coordinate, priority goes to first on the list void reorder_and_assemble_sprites() @@ -4921,58 +4824,6 @@ namespace GBHawk } } - void color_compute_BG() - { - uint32_t R; - uint32_t G; - uint32_t B; - - if ((BG_bytes_index % 2) == 0) - { - R = (uint32_t)(BG_bytes[BG_bytes_index] & 0x1F); - G = (uint32_t)(((BG_bytes[BG_bytes_index] & 0xE0) | ((BG_bytes[BG_bytes_index + 1] & 0x03) << 8)) >> 5); - B = (uint32_t)((BG_bytes[BG_bytes_index + 1] & 0x7C) >> 2); - } - else - { - R = (uint32_t)(BG_bytes[BG_bytes_index - 1] & 0x1F); - G = (uint32_t)(((BG_bytes[BG_bytes_index - 1] & 0xE0) | ((BG_bytes[BG_bytes_index] & 0x03) << 8)) >> 5); - B = (uint32_t)((BG_bytes[BG_bytes_index] & 0x7C) >> 2); - } - - uint32_t retR = ((R * 13 + G * 2 + B) >> 1) & 0xFF; - uint32_t retG = ((G * 3 + B) << 1) & 0xFF; - uint32_t retB = ((R * 3 + G * 2 + B * 11) >> 1) & 0xFF; - - BG_palette[BG_bytes_index >> 1] = (uint32_t)(0xFF000000 | (retR << 16) | (retG << 8) | retB); - } - - void color_compute_OBJ() - { - uint32_t R; - uint32_t G; - uint32_t B; - - if ((OBJ_bytes_index % 2) == 0) - { - R = (uint32_t)(OBJ_bytes[OBJ_bytes_index] & 0x1F); - G = (uint32_t)(((OBJ_bytes[OBJ_bytes_index] & 0xE0) | ((OBJ_bytes[OBJ_bytes_index + 1] & 0x03) << 8)) >> 5); - B = (uint32_t)((OBJ_bytes[OBJ_bytes_index + 1] & 0x7C) >> 2); - } - else - { - R = (uint32_t)(OBJ_bytes[OBJ_bytes_index - 1] & 0x1F); - G = (uint32_t)(((OBJ_bytes[OBJ_bytes_index - 1] & 0xE0) | ((OBJ_bytes[OBJ_bytes_index] & 0x03) << 8)) >> 5); - B = (uint32_t)((OBJ_bytes[OBJ_bytes_index] & 0x7C) >> 2); - } - - uint32_t retR = ((R * 13 + G * 2 + B) >> 1) & 0xFF; - uint32_t retG = ((G * 3 + B) << 1) & 0xFF; - uint32_t retB = ((R * 3 + G * 2 + B * 11) >> 1) & 0xFF; - - OBJ_palette[OBJ_bytes_index >> 1] = (uint32_t)(0xFF000000 | (retR << 16) | (retG << 8) | retB); - } - void Reset() { LCDC = 0;