diff --git a/Core/gb.h b/Core/gb.h
index 3ad0cf07..53b5dea8 100644
--- a/Core/gb.h
+++ b/Core/gb.h
@@ -207,7 +207,7 @@ typedef struct GB_gameboy_s {
         /* Registers */
         uint16_t pc;
         uint16_t registers[GB_REGISTERS_16_BIT];
-        bool ime;
+        uint8_t ime;
         uint8_t interrupt_enable;
         uint8_t cgb_ram_bank;
 
@@ -218,6 +218,7 @@ typedef struct GB_gameboy_s {
         bool halted;
         bool stopped;
         bool boot_rom_finished;
+        bool ime_toggle; /* ei (and di in CGB) have delayed effects.*/
 
         /* Misc state*/
         /* IR */
diff --git a/Core/memory.c b/Core/memory.c
index d81c386b..c8b5e685 100644
--- a/Core/memory.c
+++ b/Core/memory.c
@@ -129,7 +129,21 @@ static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr)
             case GB_IO_STAT:
                 return gb->io_registers[GB_IO_STAT] | 0x80;
             case GB_IO_DMG_EMULATION_INDICATION:
+                if (!gb->is_cgb) {
+                    return 0xFF;
+                }
                 return gb->io_registers[GB_IO_DMG_EMULATION_INDICATION] | 0xFE;
+
+            case GB_IO_HDMA1:
+            case GB_IO_HDMA2:
+            case GB_IO_HDMA3:
+            case GB_IO_HDMA4:
+            case GB_IO_PCM_12:
+            case GB_IO_PCM_34:
+                if (!gb->is_cgb) {
+                    return 0xFF;
+                }
+                /* Fall through */
             case GB_IO_JOYP:
             case GB_IO_DIV:
             case GB_IO_TIMA:
@@ -144,15 +158,12 @@ static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr)
             case GB_IO_OBP1:
             case GB_IO_WY:
             case GB_IO_WX:
-            case GB_IO_HDMA1:
-            case GB_IO_HDMA2:
-            case GB_IO_HDMA3:
-            case GB_IO_HDMA4:
-            case GB_IO_PCM_12:
-            case GB_IO_PCM_34:
             case GB_IO_SB:
                 return gb->io_registers[addr & 0xFF];
             case GB_IO_HDMA5:
+                if (!gb->is_cgb) {
+                    return 0xFF;
+                }
                 return (gb->io_registers[GB_IO_HDMA5] & 0x80) | ((gb->hdma_steps_left - 1) & 0x7F);
             case GB_IO_SVBK:
                 if (!gb->cgb_mode) {
@@ -380,6 +391,7 @@ static void write_high_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value)
                 return;
 
             case GB_IO_DIV:
+                gb->div_cycles = 0;
                 gb->io_registers[GB_IO_DIV] = 0;
                 return;
 
@@ -507,7 +519,7 @@ static void write_high_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value)
 
     if (addr == 0xFFFF) {
         /* Interrupt mask */
-        gb->interrupt_enable = value & 0x1F;
+        gb->interrupt_enable = value;
         return;
     }
     
@@ -539,7 +551,9 @@ void GB_write_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value)
 
 void GB_dma_run(GB_gameboy_t *gb)
 {
-    while (gb->dma_cycles >= 4 && gb->dma_steps_left) {
+    /* + 1 as a compensation over the fact that DMA is never started in the first internal cycle of an opcode,
+       and SameBoy isn't sub-cycle accurate (yet?) . */
+    while (gb->dma_cycles >= 4 + 1 && gb->dma_steps_left) {
         /* Todo: measure this value */
         gb->dma_cycles -= 4;
         gb->dma_steps_left--;
@@ -552,7 +566,9 @@ void GB_dma_run(GB_gameboy_t *gb)
 void GB_hdma_run(GB_gameboy_t *gb)
 {
     if (!gb->hdma_on) return;
-    while (gb->hdma_cycles >= 8) {
+    /* + 1 as a compensation over the fact that HDMA is never started in the first internal cycle of an opcode,
+     and SameBoy isn't sub-cycle accurate (yet?) . */
+    while (gb->hdma_cycles >= 8 + 1) {
         gb->hdma_cycles -= 8;
         // The CGB boot rom uses the dest in "absolute" space, while some games use it relative to VRAM.
         // This "normalizes" the dest to the CGB address space.
diff --git a/Core/z80_cpu.c b/Core/z80_cpu.c
index b57e979d..35d3cd5b 100644
--- a/Core/z80_cpu.c
+++ b/Core/z80_cpu.c
@@ -61,9 +61,11 @@ static void ld_rr_d16(GB_gameboy_t *gb, uint8_t opcode)
 static void ld_drr_a(GB_gameboy_t *gb, uint8_t opcode)
 {
     uint8_t register_id;
-    GB_advance_cycles(gb, 8);
-    register_id = (GB_read_memory(gb, gb->pc++) >> 4) + 1;
+    GB_advance_cycles(gb, 4);
+    register_id = (opcode >> 4) + 1;
+    gb->pc++;
     GB_write_memory(gb, gb->registers[register_id], gb->registers[GB_REGISTER_AF] >> 8);
+    GB_advance_cycles(gb, 4);
 }
 
 static void inc_rr(GB_gameboy_t *gb, uint8_t opcode)
@@ -183,10 +185,12 @@ static void add_hl_rr(GB_gameboy_t *gb, uint8_t opcode)
 static void ld_a_drr(GB_gameboy_t *gb, uint8_t opcode)
 {
     uint8_t register_id;
-    GB_advance_cycles(gb, 8);
-    register_id = (GB_read_memory(gb, gb->pc++) >> 4) + 1;
+    register_id = (opcode >> 4) + 1;
+    GB_advance_cycles(gb, 4);
+    gb->pc++;
     gb->registers[GB_REGISTER_AF] &= 0xFF;
     gb->registers[GB_REGISTER_AF] |= GB_read_memory(gb, gb->registers[register_id]) << 8;
+    GB_advance_cycles(gb, 4);
 }
 
 static void dec_rr(GB_gameboy_t *gb, uint8_t opcode)
@@ -713,10 +717,13 @@ static void ret_cc(GB_gameboy_t *gb, uint8_t opcode)
 static void pop_rr(GB_gameboy_t *gb, uint8_t opcode)
 {
     uint8_t register_id;
-    GB_advance_cycles(gb, 12);
-    register_id = ((GB_read_memory(gb, gb->pc++) >> 4) + 1) & 3;
-    gb->registers[register_id] = GB_read_memory(gb, gb->registers[GB_REGISTER_SP]) |
-    (GB_read_memory(gb, gb->registers[GB_REGISTER_SP] + 1) << 8);
+    GB_advance_cycles(gb, 4);
+    register_id = ((opcode >> 4) + 1) & 3;
+    gb->pc++;
+    GB_advance_cycles(gb, 4);
+    gb->registers[register_id] = GB_read_memory(gb, gb->registers[GB_REGISTER_SP]);
+    GB_advance_cycles(gb, 4);
+    gb->registers[register_id] |= GB_read_memory(gb, gb->registers[GB_REGISTER_SP] + 1) << 8;
     gb->registers[GB_REGISTER_AF] &= 0xFFF0; // Make sure we don't set impossible flags on F! See Blargg's PUSH AF test.
     gb->registers[GB_REGISTER_SP] += 2;
 }
@@ -725,8 +732,13 @@ static void jp_cc_a16(GB_gameboy_t *gb, uint8_t opcode)
 {
     gb->pc++;
     if (condition_code(gb, opcode)) {
-        GB_advance_cycles(gb, 16);
-        gb->pc = GB_read_memory(gb, gb->pc) | (GB_read_memory(gb, gb->pc + 1) << 8);
+        GB_advance_cycles(gb, 4);
+        uint16_t addr = GB_read_memory(gb, gb->pc);
+        GB_advance_cycles(gb, 4);
+        addr |= (GB_read_memory(gb, gb->pc + 1) << 8);
+        GB_advance_cycles(gb, 8);
+        gb->pc = addr;
+
     }
     else {
         GB_advance_cycles(gb, 12);
@@ -736,20 +748,30 @@ static void jp_cc_a16(GB_gameboy_t *gb, uint8_t opcode)
 
 static void jp_a16(GB_gameboy_t *gb, uint8_t opcode)
 {
-    GB_advance_cycles(gb, 16);
     gb->pc++;
-    gb->pc = GB_read_memory(gb, gb->pc) | (GB_read_memory(gb, gb->pc + 1) << 8);
-}
+    GB_advance_cycles(gb, 4);
+    uint16_t addr = GB_read_memory(gb, gb->pc);
+    GB_advance_cycles(gb, 4);
+    addr |= (GB_read_memory(gb, gb->pc + 1) << 8);
+    GB_advance_cycles(gb, 8);
+    gb->pc = addr;}
 
 static void call_cc_a16(GB_gameboy_t *gb, uint8_t opcode)
 {
     gb->pc++;
     if (condition_code(gb, opcode)) {
-        GB_advance_cycles(gb, 24);
+        GB_advance_cycles(gb, 4);
         gb->registers[GB_REGISTER_SP] -= 2;
-        GB_write_memory(gb, gb->registers[GB_REGISTER_SP], (gb->pc + 2) & 0xFF);
+        uint16_t addr = GB_read_memory(gb, gb->pc);
+        GB_advance_cycles(gb, 4);
+        addr |= (GB_read_memory(gb, gb->pc + 1) << 8);
+        GB_advance_cycles(gb, 8);
         GB_write_memory(gb, gb->registers[GB_REGISTER_SP] + 1, (gb->pc + 2) >> 8);
-        gb->pc = GB_read_memory(gb, gb->pc) | (GB_read_memory(gb, gb->pc + 1) << 8);
+        GB_advance_cycles(gb, 4);
+        GB_write_memory(gb, gb->registers[GB_REGISTER_SP], (gb->pc + 2) & 0xFF);
+        GB_advance_cycles(gb, 4);
+        gb->pc = addr;
+
         GB_debugger_call_hook(gb);
     }
     else {
@@ -761,12 +783,14 @@ static void call_cc_a16(GB_gameboy_t *gb, uint8_t opcode)
 static void push_rr(GB_gameboy_t *gb, uint8_t opcode)
 {
     uint8_t register_id;
-    GB_advance_cycles(gb, 16);
+    GB_advance_cycles(gb, 8);
     gb->pc++;
     register_id = ((opcode >> 4) + 1) & 3;
     gb->registers[GB_REGISTER_SP] -= 2;
-    GB_write_memory(gb, gb->registers[GB_REGISTER_SP], (gb->registers[register_id]) & 0xFF);
     GB_write_memory(gb, gb->registers[GB_REGISTER_SP] + 1, (gb->registers[register_id]) >> 8);
+    GB_advance_cycles(gb, 4);
+    GB_write_memory(gb, gb->registers[GB_REGISTER_SP], (gb->registers[register_id]) & 0xFF);
+    GB_advance_cycles(gb, 4);
 }
 
 static void add_a_d8(GB_gameboy_t *gb, uint8_t opcode)
@@ -910,10 +934,12 @@ static void cp_a_d8(GB_gameboy_t *gb, uint8_t opcode)
 
 static void rst(GB_gameboy_t *gb, uint8_t opcode)
 {
-    GB_advance_cycles(gb, 16);
+    GB_advance_cycles(gb, 8);
     gb->registers[GB_REGISTER_SP] -= 2;
-    GB_write_memory(gb, gb->registers[GB_REGISTER_SP], (gb->pc + 1) & 0xFF);
     GB_write_memory(gb, gb->registers[GB_REGISTER_SP] + 1, (gb->pc + 1) >> 8);
+    GB_advance_cycles(gb, 4);
+    GB_write_memory(gb, gb->registers[GB_REGISTER_SP], (gb->pc + 1) & 0xFF);
+    GB_advance_cycles(gb, 4);
     gb->pc = opcode ^ 0xC7;
     GB_debugger_call_hook(gb);
 }
@@ -921,9 +947,11 @@ static void rst(GB_gameboy_t *gb, uint8_t opcode)
 static void ret(GB_gameboy_t *gb, uint8_t opcode)
 {
     GB_debugger_ret_hook(gb);
-    GB_advance_cycles(gb, 16);
-    gb->pc = GB_read_memory(gb, gb->registers[GB_REGISTER_SP]) |
-    (GB_read_memory(gb, gb->registers[GB_REGISTER_SP] + 1) << 8);
+    GB_advance_cycles(gb, 4);
+    gb->pc = GB_read_memory(gb, gb->registers[GB_REGISTER_SP]);
+    GB_advance_cycles(gb, 4);
+    gb->pc |= GB_read_memory(gb, gb->registers[GB_REGISTER_SP] + 1) << 8;
+    GB_advance_cycles(gb, 8);
     gb->registers[GB_REGISTER_SP] += 2;
 }
 
@@ -935,12 +963,18 @@ static void reti(GB_gameboy_t *gb, uint8_t opcode)
 
 static void call_a16(GB_gameboy_t *gb, uint8_t opcode)
 {
-    GB_advance_cycles(gb, 24);
     gb->pc++;
+    GB_advance_cycles(gb, 4);
     gb->registers[GB_REGISTER_SP] -= 2;
-    GB_write_memory(gb, gb->registers[GB_REGISTER_SP], (gb->pc + 2) & 0xFF);
+    uint16_t addr = GB_read_memory(gb, gb->pc);
+    GB_advance_cycles(gb, 4);
+    addr |= (GB_read_memory(gb, gb->pc + 1) << 8);
+    GB_advance_cycles(gb, 8);
     GB_write_memory(gb, gb->registers[GB_REGISTER_SP] + 1, (gb->pc + 2) >> 8);
-    gb->pc = GB_read_memory(gb, gb->pc) | (GB_read_memory(gb, gb->pc + 1) << 8);
+    GB_advance_cycles(gb, 4);
+    GB_write_memory(gb, gb->registers[GB_REGISTER_SP], (gb->pc + 2) & 0xFF);
+    GB_advance_cycles(gb, 4);
+    gb->pc = addr;
     GB_debugger_call_hook(gb);
 }
 
@@ -982,9 +1016,10 @@ static void add_sp_r8(GB_gameboy_t *gb, uint8_t opcode)
 {
     int16_t offset;
     uint16_t sp = gb->registers[GB_REGISTER_SP];
-    GB_advance_cycles(gb, 16);
+    GB_advance_cycles(gb, 4);
     gb->pc++;
     offset = (int8_t) GB_read_memory(gb, gb->pc++);
+    GB_advance_cycles(gb, 12);
     gb->registers[GB_REGISTER_SP] += offset;
 
     gb->registers[GB_REGISTER_AF] &= 0xFF00;
@@ -1030,23 +1065,30 @@ static void di(GB_gameboy_t *gb, uint8_t opcode)
 {
     GB_advance_cycles(gb, 4);
     gb->pc++;
-    gb->ime = false;
+
+    /* di is delayed in CGB */
+    if (!gb->is_cgb) {
+        gb->ime = false;
+    }
 }
 
 static void ei(GB_gameboy_t *gb, uint8_t opcode)
 {
+    /* ei is actually "disable interrupts for one instruction, then enable them". */
     GB_advance_cycles(gb, 4);
     gb->pc++;
-    gb->ime = true;
+    gb->ime = false;
+    gb->ime_toggle = true;
 }
 
 static void ld_hl_sp_r8(GB_gameboy_t *gb, uint8_t opcode)
 {
     int16_t offset;
-    GB_advance_cycles(gb, 12);
+    GB_advance_cycles(gb, 4);
     gb->pc++;
     gb->registers[GB_REGISTER_AF] &= 0xFF00;
     offset = (int8_t) GB_read_memory(gb, gb->pc++);
+    GB_advance_cycles(gb, 8);
     gb->registers[GB_REGISTER_HL] = gb->registers[GB_REGISTER_SP] + offset;
 
     if ((gb->registers[GB_REGISTER_SP] & 0xF) + (offset & 0xF) > 0xF) {
@@ -1321,6 +1363,10 @@ void GB_cpu_run(GB_gameboy_t *gb)
     }
 
     if (gb->ime && interrupt) {
+        if (gb->ime_toggle) {
+            gb->ime = !gb->ime;
+            gb->ime_toggle = false;
+        }
         uint8_t interrupt_bit = 0;
         uint8_t interrupt_queue = gb->interrupt_enable & gb->io_registers[GB_IO_IF];
         while (!(interrupt_queue & 1)) {
@@ -1329,12 +1375,17 @@ void GB_cpu_run(GB_gameboy_t *gb)
         }
         gb->io_registers[GB_IO_IF] &= ~(1 << interrupt_bit);
         gb->ime = false;
+        gb->ime_toggle = false;
         nop(gb, 0);
         gb->pc -= 2;
         /* Run pseudo instructions rst 40-60*/
         rst(gb, 0x87 + interrupt_bit * 8);
     }
     else if(!gb->halted && !gb->stopped) {
+        if (gb->ime_toggle) {
+            gb->ime = !gb->ime;
+            gb->ime_toggle = false;
+        }
         uint8_t opcode = GB_read_memory(gb, gb->pc);
         opcodes[opcode](gb, opcode);
     }