diff --git a/Core/debugger.c b/Core/debugger.c index 4608f9f..b7829e1 100644 --- a/Core/debugger.c +++ b/Core/debugger.c @@ -1983,7 +1983,7 @@ static bool apu(GB_gameboy_t *gb, char *arguments, char *modifiers, const debugg GB_log(gb, " LFSR in %u-step mode, current value ", gb->apu.noise_channel.narrow? 7 : 15); - for (uint16_t lfsr = gb->apu.noise_channel.lfsr, i = 15; i--; lfsr <<= 1) { + nounroll for (uint16_t lfsr = gb->apu.noise_channel.lfsr, i = 15; i--; lfsr <<= 1) { GB_log(gb, "%u%s", (lfsr >> 14) & 1, i%4 ? "" : " "); } @@ -2166,9 +2166,9 @@ static void print_command_description(GB_gameboy_t *gb, const debugger_command_t const char *string = command->help_string; const unsigned width = 80 - 13; - while (strlen(string) > width) { + nounroll while (strlen(string) > width) { const char *space = string + width; - while (*space != ' ') { + nounroll while (*space != ' ') { space--; if (space == string) { // This help string has some extra long word? Abort line-breaking, it's going to break anyway. @@ -2202,7 +2202,7 @@ static bool help(GB_gameboy_t *gb, char *arguments, char *modifiers, const debug } return true; } - for (command = commands; command->command; command++) { + nounroll for (command = commands; command->command; command++) { if (command->help_string) { print_command_description(gb, command); } diff --git a/Core/defs.h b/Core/defs.h index 7e265b6..94ce819 100644 --- a/Core/defs.h +++ b/Core/defs.h @@ -15,10 +15,13 @@ #if __clang__ #define unrolled _Pragma("unroll") +#define nounroll _Pragma("clang loop unroll(disable)") #elif __GNUC__ >= 8 #define unrolled _Pragma("GCC unroll 8") +#define nounroll _Pragma("GCC unroll 0") #else #define unrolled +#define nounroll #endif #define unreachable() __builtin_unreachable(); diff --git a/Core/display.c b/Core/display.c index 5db782e..7191f65 100644 --- a/Core/display.c +++ b/Core/display.c @@ -403,7 +403,7 @@ void GB_set_color_correction_mode(GB_gameboy_t *gb, GB_color_correction_mode_t m { gb->color_correction_mode = mode; if (GB_is_cgb(gb)) { - for (unsigned i = 0; i < 32; i++) { + nounroll for (unsigned i = 0; i < 32; i++) { GB_palette_changed(gb, false, i * 2); GB_palette_changed(gb, true, i * 2); } @@ -414,7 +414,7 @@ void GB_set_light_temperature(GB_gameboy_t *gb, double temperature) { gb->light_temperature = temperature; if (GB_is_cgb(gb)) { - for (unsigned i = 0; i < 32; i++) { + nounroll for (unsigned i = 0; i < 32; i++) { GB_palette_changed(gb, false, i * 2); GB_palette_changed(gb, true, i * 2); } @@ -1172,7 +1172,7 @@ object_buffer_pointer++\ data0 <<= fractional_scroll; data1 <<= fractional_scroll; bool check_window = gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & GB_LCDC_WIN_ENABLE); - for (unsigned i = fractional_scroll; i < 8; i++) { + nounroll for (unsigned i = fractional_scroll; i < 8; i++) { if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { activate_window: check_window = false; @@ -1187,7 +1187,7 @@ activate_window: while (pixels < 160 - 8) { get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1); - for (unsigned i = 0; i < 8; i++) { + nounroll for (unsigned i = 0; i < 8; i++) { if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { goto activate_window; } @@ -1313,7 +1313,7 @@ object_buffer_pointer++\ data0 <<= fractional_scroll; data1 <<= fractional_scroll; bool check_window = gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & GB_LCDC_WIN_ENABLE); - for (unsigned i = fractional_scroll; i < 8; i++) { + nounroll for (unsigned i = fractional_scroll; i < 8; i++) { if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { activate_window: check_window = false; @@ -1328,7 +1328,7 @@ object_buffer_pointer++\ while (pixels < 160 - 8) { get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1); - for (unsigned i = 0; i < 8; i++) { + nounroll for (unsigned i = 0; i < 8; i++) { if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { goto activate_window; } diff --git a/Core/gb.c b/Core/gb.c index 783f10b..01f004f 100644 --- a/Core/gb.c +++ b/Core/gb.c @@ -1515,7 +1515,7 @@ static void reset_ram(GB_gameboy_t *gb) case GB_MODEL_CGB_E: case GB_MODEL_AGB_A: case GB_MODEL_GBP_A: - for (unsigned i = 0; i < sizeof(gb->hram); i++) { + nounroll for (unsigned i = 0; i < sizeof(gb->hram); i++) { gb->hram[i] = GB_random(); } break; @@ -1528,7 +1528,7 @@ static void reset_ram(GB_gameboy_t *gb) case GB_MODEL_SGB_PAL_NO_SFC: /* Unverified */ case GB_MODEL_SGB2: case GB_MODEL_SGB2_NO_SFC: - for (unsigned i = 0; i < sizeof(gb->hram); i++) { + nounroll for (unsigned i = 0; i < sizeof(gb->hram); i++) { if (i & 1) { gb->hram[i] = GB_random() | GB_random() | GB_random(); } @@ -1568,7 +1568,7 @@ static void reset_ram(GB_gameboy_t *gb) gb->oam[i] = GB_random() | GB_random() | GB_random(); } } - for (unsigned i = 8; i < sizeof(gb->oam); i++) { + nounroll for (unsigned i = 8; i < sizeof(gb->oam); i++) { gb->oam[i] = gb->oam[i - 8]; } break; @@ -1587,7 +1587,7 @@ static void reset_ram(GB_gameboy_t *gb) /* Initialized by CGB-A and newer, 0s in CGB-0 */ break; case GB_MODEL_MGB: { - for (unsigned i = 0; i < GB_IO_WAV_END - GB_IO_WAV_START; i++) { + nounroll for (unsigned i = 0; i < GB_IO_WAV_END - GB_IO_WAV_START; i++) { if (i & 1) { gb->io_registers[GB_IO_WAV_START + i] = GB_random() & GB_random(); } @@ -1604,7 +1604,7 @@ static void reset_ram(GB_gameboy_t *gb) case GB_MODEL_SGB_PAL_NO_SFC: /* Unverified */ case GB_MODEL_SGB2: case GB_MODEL_SGB2_NO_SFC: { - for (unsigned i = 0; i < GB_IO_WAV_END - GB_IO_WAV_START; i++) { + nounroll for (unsigned i = 0; i < GB_IO_WAV_END - GB_IO_WAV_START; i++) { if (i & 1) { gb->io_registers[GB_IO_WAV_START + i] = GB_random() & GB_random() & GB_random(); } diff --git a/Core/sgb.c b/Core/sgb.c index 891a27d..8a29d50 100644 --- a/Core/sgb.c +++ b/Core/sgb.c @@ -168,7 +168,7 @@ static void command_ready(GB_gameboy_t *gb) if (gb->sgb->command[0] == 0xFB) { if (gb->sgb->received_header[0x42] != 3 || gb->sgb->received_header[0x47] != 0x33) { gb->sgb->disable_commands = true; - for (unsigned i = 0; i < sizeof(palette_assignments) / sizeof(palette_assignments[0]); i++) { + nounroll for (unsigned i = 0; i < sizeof(palette_assignments) / sizeof(palette_assignments[0]); i++) { if (memcmp(palette_assignments[i].name, &gb->sgb->received_header[0x30], sizeof(palette_assignments[i].name)) == 0) { gb->sgb->effective_palettes[0] = LE16(built_in_palettes[palette_assignments[i].palette_index * 4 - 4]); gb->sgb->effective_palettes[1] = LE16(built_in_palettes[palette_assignments[i].palette_index * 4 + 1 - 4]); diff --git a/Makefile b/Makefile index 077bad6..07354b6 100644 --- a/Makefile +++ b/Makefile @@ -216,6 +216,8 @@ ifeq ($(CONF),debug) CFLAGS += -g else ifeq ($(CONF), release) CFLAGS += -O3 -ffast-math -DNDEBUG +# The frontend code is not time-critical, prefer reducing the size for less memory use and better cache utilization +FRONTEND_CFLAGS += -Oz STRIP := strip CODESIGN := true ifeq ($(PLATFORM),Darwin) @@ -312,25 +314,25 @@ $(OBJ)/Core/%.c.o: Core/%.c $(OBJ)/SDL/%.c.o: SDL/%.c -@$(MKDIR) -p $(dir $@) - $(CC) $(CFLAGS) $(FAT_FLAGS) $(SDL_CFLAGS) $(GL_CFLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) $(SDL_CFLAGS) $(GL_CFLAGS) -c $< -o $@ $(OBJ)/OpenDialog/%.c.o: OpenDialog/%.c -@$(MKDIR) -p $(dir $@) - $(CC) $(CFLAGS) $(FAT_FLAGS) $(SDL_CFLAGS) $(GL_CFLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) $(SDL_CFLAGS) $(GL_CFLAGS) -c $< -o $@ $(OBJ)/%.c.o: %.c -@$(MKDIR) -p $(dir $@) - $(CC) $(CFLAGS) $(FAT_FLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) -c $< -o $@ # HexFiend requires more flags $(OBJ)/HexFiend/%.m.o: HexFiend/%.m -@$(MKDIR) -p $(dir $@) - $(CC) $(CFLAGS) $(FAT_FLAGS) $(OCFLAGS) -c $< -o $@ -fno-objc-arc -include HexFiend/HexFiend_2_Framework_Prefix.pch + $(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) $(OCFLAGS) -c $< -o $@ -fno-objc-arc -include HexFiend/HexFiend_2_Framework_Prefix.pch $(OBJ)/%.m.o: %.m -@$(MKDIR) -p $(dir $@) - $(CC) $(CFLAGS) $(FAT_FLAGS) $(OCFLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) $(OCFLAGS) -c $< -o $@ # iOS Port