Use Oz for the frontend files, reduce some aggressive loop unrolling

This commit is contained in:
Lior Halphon 2023-05-11 00:08:19 +03:00
parent 96e337edac
commit 996ebaafa3
6 changed files with 26 additions and 21 deletions

View File

@ -1983,7 +1983,7 @@ static bool apu(GB_gameboy_t *gb, char *arguments, char *modifiers, const debugg
GB_log(gb, " LFSR in %u-step mode, current value ",
gb->apu.noise_channel.narrow? 7 : 15);
for (uint16_t lfsr = gb->apu.noise_channel.lfsr, i = 15; i--; lfsr <<= 1) {
nounroll for (uint16_t lfsr = gb->apu.noise_channel.lfsr, i = 15; i--; lfsr <<= 1) {
GB_log(gb, "%u%s", (lfsr >> 14) & 1, i%4 ? "" : " ");
}
@ -2166,9 +2166,9 @@ static void print_command_description(GB_gameboy_t *gb, const debugger_command_t
const char *string = command->help_string;
const unsigned width = 80 - 13;
while (strlen(string) > width) {
nounroll while (strlen(string) > width) {
const char *space = string + width;
while (*space != ' ') {
nounroll while (*space != ' ') {
space--;
if (space == string) {
// This help string has some extra long word? Abort line-breaking, it's going to break anyway.
@ -2202,7 +2202,7 @@ static bool help(GB_gameboy_t *gb, char *arguments, char *modifiers, const debug
}
return true;
}
for (command = commands; command->command; command++) {
nounroll for (command = commands; command->command; command++) {
if (command->help_string) {
print_command_description(gb, command);
}

View File

@ -15,10 +15,13 @@
#if __clang__
#define unrolled _Pragma("unroll")
#define nounroll _Pragma("clang loop unroll(disable)")
#elif __GNUC__ >= 8
#define unrolled _Pragma("GCC unroll 8")
#define nounroll _Pragma("GCC unroll 0")
#else
#define unrolled
#define nounroll
#endif
#define unreachable() __builtin_unreachable();

View File

@ -403,7 +403,7 @@ void GB_set_color_correction_mode(GB_gameboy_t *gb, GB_color_correction_mode_t m
{
gb->color_correction_mode = mode;
if (GB_is_cgb(gb)) {
for (unsigned i = 0; i < 32; i++) {
nounroll for (unsigned i = 0; i < 32; i++) {
GB_palette_changed(gb, false, i * 2);
GB_palette_changed(gb, true, i * 2);
}
@ -414,7 +414,7 @@ void GB_set_light_temperature(GB_gameboy_t *gb, double temperature)
{
gb->light_temperature = temperature;
if (GB_is_cgb(gb)) {
for (unsigned i = 0; i < 32; i++) {
nounroll for (unsigned i = 0; i < 32; i++) {
GB_palette_changed(gb, false, i * 2);
GB_palette_changed(gb, true, i * 2);
}
@ -1172,7 +1172,7 @@ object_buffer_pointer++\
data0 <<= fractional_scroll;
data1 <<= fractional_scroll;
bool check_window = gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & GB_LCDC_WIN_ENABLE);
for (unsigned i = fractional_scroll; i < 8; i++) {
nounroll for (unsigned i = fractional_scroll; i < 8; i++) {
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
activate_window:
check_window = false;
@ -1187,7 +1187,7 @@ activate_window:
while (pixels < 160 - 8) {
get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1);
for (unsigned i = 0; i < 8; i++) {
nounroll for (unsigned i = 0; i < 8; i++) {
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
goto activate_window;
}
@ -1313,7 +1313,7 @@ object_buffer_pointer++\
data0 <<= fractional_scroll;
data1 <<= fractional_scroll;
bool check_window = gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & GB_LCDC_WIN_ENABLE);
for (unsigned i = fractional_scroll; i < 8; i++) {
nounroll for (unsigned i = fractional_scroll; i < 8; i++) {
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
activate_window:
check_window = false;
@ -1328,7 +1328,7 @@ object_buffer_pointer++\
while (pixels < 160 - 8) {
get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1);
for (unsigned i = 0; i < 8; i++) {
nounroll for (unsigned i = 0; i < 8; i++) {
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
goto activate_window;
}

View File

@ -1515,7 +1515,7 @@ static void reset_ram(GB_gameboy_t *gb)
case GB_MODEL_CGB_E:
case GB_MODEL_AGB_A:
case GB_MODEL_GBP_A:
for (unsigned i = 0; i < sizeof(gb->hram); i++) {
nounroll for (unsigned i = 0; i < sizeof(gb->hram); i++) {
gb->hram[i] = GB_random();
}
break;
@ -1528,7 +1528,7 @@ static void reset_ram(GB_gameboy_t *gb)
case GB_MODEL_SGB_PAL_NO_SFC: /* Unverified */
case GB_MODEL_SGB2:
case GB_MODEL_SGB2_NO_SFC:
for (unsigned i = 0; i < sizeof(gb->hram); i++) {
nounroll for (unsigned i = 0; i < sizeof(gb->hram); i++) {
if (i & 1) {
gb->hram[i] = GB_random() | GB_random() | GB_random();
}
@ -1568,7 +1568,7 @@ static void reset_ram(GB_gameboy_t *gb)
gb->oam[i] = GB_random() | GB_random() | GB_random();
}
}
for (unsigned i = 8; i < sizeof(gb->oam); i++) {
nounroll for (unsigned i = 8; i < sizeof(gb->oam); i++) {
gb->oam[i] = gb->oam[i - 8];
}
break;
@ -1587,7 +1587,7 @@ static void reset_ram(GB_gameboy_t *gb)
/* Initialized by CGB-A and newer, 0s in CGB-0 */
break;
case GB_MODEL_MGB: {
for (unsigned i = 0; i < GB_IO_WAV_END - GB_IO_WAV_START; i++) {
nounroll for (unsigned i = 0; i < GB_IO_WAV_END - GB_IO_WAV_START; i++) {
if (i & 1) {
gb->io_registers[GB_IO_WAV_START + i] = GB_random() & GB_random();
}
@ -1604,7 +1604,7 @@ static void reset_ram(GB_gameboy_t *gb)
case GB_MODEL_SGB_PAL_NO_SFC: /* Unverified */
case GB_MODEL_SGB2:
case GB_MODEL_SGB2_NO_SFC: {
for (unsigned i = 0; i < GB_IO_WAV_END - GB_IO_WAV_START; i++) {
nounroll for (unsigned i = 0; i < GB_IO_WAV_END - GB_IO_WAV_START; i++) {
if (i & 1) {
gb->io_registers[GB_IO_WAV_START + i] = GB_random() & GB_random() & GB_random();
}

View File

@ -168,7 +168,7 @@ static void command_ready(GB_gameboy_t *gb)
if (gb->sgb->command[0] == 0xFB) {
if (gb->sgb->received_header[0x42] != 3 || gb->sgb->received_header[0x47] != 0x33) {
gb->sgb->disable_commands = true;
for (unsigned i = 0; i < sizeof(palette_assignments) / sizeof(palette_assignments[0]); i++) {
nounroll for (unsigned i = 0; i < sizeof(palette_assignments) / sizeof(palette_assignments[0]); i++) {
if (memcmp(palette_assignments[i].name, &gb->sgb->received_header[0x30], sizeof(palette_assignments[i].name)) == 0) {
gb->sgb->effective_palettes[0] = LE16(built_in_palettes[palette_assignments[i].palette_index * 4 - 4]);
gb->sgb->effective_palettes[1] = LE16(built_in_palettes[palette_assignments[i].palette_index * 4 + 1 - 4]);

View File

@ -216,6 +216,8 @@ ifeq ($(CONF),debug)
CFLAGS += -g
else ifeq ($(CONF), release)
CFLAGS += -O3 -ffast-math -DNDEBUG
# The frontend code is not time-critical, prefer reducing the size for less memory use and better cache utilization
FRONTEND_CFLAGS += -Oz
STRIP := strip
CODESIGN := true
ifeq ($(PLATFORM),Darwin)
@ -312,25 +314,25 @@ $(OBJ)/Core/%.c.o: Core/%.c
$(OBJ)/SDL/%.c.o: SDL/%.c
-@$(MKDIR) -p $(dir $@)
$(CC) $(CFLAGS) $(FAT_FLAGS) $(SDL_CFLAGS) $(GL_CFLAGS) -c $< -o $@
$(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) $(SDL_CFLAGS) $(GL_CFLAGS) -c $< -o $@
$(OBJ)/OpenDialog/%.c.o: OpenDialog/%.c
-@$(MKDIR) -p $(dir $@)
$(CC) $(CFLAGS) $(FAT_FLAGS) $(SDL_CFLAGS) $(GL_CFLAGS) -c $< -o $@
$(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) $(SDL_CFLAGS) $(GL_CFLAGS) -c $< -o $@
$(OBJ)/%.c.o: %.c
-@$(MKDIR) -p $(dir $@)
$(CC) $(CFLAGS) $(FAT_FLAGS) -c $< -o $@
$(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) -c $< -o $@
# HexFiend requires more flags
$(OBJ)/HexFiend/%.m.o: HexFiend/%.m
-@$(MKDIR) -p $(dir $@)
$(CC) $(CFLAGS) $(FAT_FLAGS) $(OCFLAGS) -c $< -o $@ -fno-objc-arc -include HexFiend/HexFiend_2_Framework_Prefix.pch
$(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) $(OCFLAGS) -c $< -o $@ -fno-objc-arc -include HexFiend/HexFiend_2_Framework_Prefix.pch
$(OBJ)/%.m.o: %.m
-@$(MKDIR) -p $(dir $@)
$(CC) $(CFLAGS) $(FAT_FLAGS) $(OCFLAGS) -c $< -o $@
$(CC) $(CFLAGS) $(FRONTEND_CFLAGS) $(FAT_FLAGS) $(OCFLAGS) -c $< -o $@
# iOS Port