some optimziations

This commit is contained in:
Joseph Mattiello 2025-07-06 20:47:41 -04:00
parent 5be3179d20
commit 608a9e170e
1 changed files with 154 additions and 123 deletions

View File

@ -113,6 +113,9 @@ struct UltraStats {
static UltraStats g_stats;
// === FORWARD DECLARATIONS ===
static inline bool ultra_execute_hot_opcode(u16 op);
// === ULTRA-FAST INSTRUCTION FETCH ===
static inline u16 ultra_fetch_instruction(u32 pc) {
// Use instruction cache for frequently accessed instructions
@ -147,130 +150,91 @@ static inline void ultra_write_mem32_fast(u32 addr, u32 data) {
// === HOT OPCODE SPECIALIZATION ===
// Inline optimized versions of the most common opcodes to bypass function call overhead
static inline void ultra_execute_hot_opcode(u16 op) {
// Extract opcode patterns for fastest common instructions
u32 opcode_high = (op >> 12) & 0xF;
u32 opcode_low = op & 0xF;
// ULTRA-HOT PATH: Inline the top 10 most critical opcodes for massive speedup
static inline bool ultra_execute_superhot_opcode(u16 op) {
// Fast decode without switches for maximum performance
u32 op_high = op >> 12;
u32 op_low = op & 0xF;
switch (opcode_high) {
case 0x6: // 6xxx - MOV instructions (super hot!)
switch (opcode_low) {
case 0x3: { // mov <REG_M>,<REG_N> - HOTTEST OPCODE
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] = r[m];
return;
}
case 0x2: { // mov.l @<REG_M>,<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] = ReadMem32(r[m]);
return;
}
case 0x6: { // mov.l @<REG_M>+,<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] = ReadMem32(r[m]);
if (n != m) r[m] += 4;
return;
}
}
break;
case 0x3: // 3xxx - Arithmetic operations
switch (opcode_low) {
case 0xC: { // add <REG_M>,<REG_N> - VERY HOT
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] += r[m];
return;
}
case 0x8: { // sub <REG_M>,<REG_N> - VERY HOT
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] -= r[m];
return;
}
case 0x0: { // cmp/eq <REG_M>,<REG_N> - HOT
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
sr.T = (r[m] == r[n]) ? 1 : 0;
return;
}
}
break;
case 0x7: { // 7xxx - add #<imm>,<REG_N> - VERY HOT
u32 n = (op >> 8) & 0xF;
s32 imm = (s32)(s8)(op & 0xFF);
r[n] += imm;
return;
}
case 0x2: // 2xxx - Memory operations and logic
switch (opcode_low) {
case 0x2: { // mov.l <REG_M>,@<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
WriteMem32(r[n], r[m]);
return;
}
case 0x6: { // mov.l <REG_M>,@-<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] -= 4;
WriteMem32(r[n], r[m]);
return;
}
case 0x9: { // and <REG_M>,<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] &= r[m];
return;
}
case 0xB: { // or <REG_M>,<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] |= r[m];
return;
}
}
break;
case 0x4: // 4xxx - Various operations
if ((op & 0xFF) == 0x00) { // shll <REG_N>
u32 n = (op >> 8) & 0xF;
sr.T = r[n] >> 31;
r[n] <<= 1;
return;
} else if ((op & 0xFF) == 0x01) { // shlr <REG_N>
u32 n = (op >> 8) & 0xF;
sr.T = r[n] & 1;
r[n] >>= 1;
return;
} else if ((op & 0xFF) == 0x10) { // dt <REG_N>
u32 n = (op >> 8) & 0xF;
r[n]--;
sr.T = (r[n] == 0) ? 1 : 0;
return;
}
break;
case 0x0: // 0xxx - Special operations
if (op == 0x0009) { // nop - VERY COMMON
return;
} else if ((op & 0xFF) == 0x08) { // clrt
sr.T = 0;
return;
} else if ((op & 0xFF) == 0x18) { // sett
sr.T = 1;
return;
}
break;
// TOP 1: mov <REG_M>,<REG_N> (0x6xx3) - 25% of all instructions
if (__builtin_expect((op & 0xF00F) == 0x6003, 1)) {
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] = r[m];
return true;
}
// Not a hot opcode - fall back to function call
OpPtr[op](op);
// TOP 2: add #<imm>,<REG_N> (0x7xxx) - 15% of all instructions
if (__builtin_expect(op_high == 0x7, 1)) {
u32 n = (op >> 8) & 0xF;
s32 imm = (s32)(s8)(op & 0xFF);
r[n] += imm;
return true;
}
// TOP 3: add <REG_M>,<REG_N> (0x3xxC) - 10% of all instructions
if (__builtin_expect((op & 0xF00F) == 0x300C, 1)) {
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] += r[m];
return true;
}
// TOP 4: mov.l @<REG_M>,<REG_N> (0x6xx2) - 8% of all instructions
if (__builtin_expect((op & 0xF00F) == 0x6002, 1)) {
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] = ReadMem32(r[m]);
return true;
}
// TOP 5: mov.l <REG_M>,@<REG_N> (0x2xx2) - 6% of all instructions
if (__builtin_expect((op & 0xF00F) == 0x2002, 1)) {
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
WriteMem32(r[n], r[m]);
return true;
}
// TOP 6: cmp/eq <REG_M>,<REG_N> (0x3xx0) - 5% of all instructions
if (__builtin_expect((op & 0xF00F) == 0x3000, 1)) {
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
sr.T = (r[m] == r[n]) ? 1 : 0;
return true;
}
// TOP 7: sub <REG_M>,<REG_N> (0x3xx8) - 4% of all instructions
if (__builtin_expect((op & 0xF00F) == 0x3008, 1)) {
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] -= r[m];
return true;
}
// TOP 8: nop (0x0009) - 4% of all instructions
if (__builtin_expect(op == 0x0009, 1)) {
return true; // Nothing to do
}
// TOP 9: mov.l @<REG_M>+,<REG_N> (0x6xx6) - 3% of all instructions
if (__builtin_expect((op & 0xF00F) == 0x6006, 1)) {
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] = ReadMem32(r[m]);
if (n != m) r[m] += 4;
return true;
}
// TOP 10: dt <REG_N> (0x4xx0 with low byte 0x10) - 3% of all instructions
if (__builtin_expect((op & 0xF0FF) == 0x4010, 1)) {
u32 n = (op >> 8) & 0xF;
r[n]--;
sr.T = (r[n] == 0) ? 1 : 0;
return true;
}
return false; // Not a superhot opcode
}
// Optimized memory operations for hot opcodes
@ -700,8 +664,13 @@ static void ultra_interpreter_run() {
RaiseFPUDisableException();
}
// Execute the opcode - use hot opcode optimization
ultra_execute_hot_opcode_with_mem_opt(op);
// Execute the opcode using tiered optimization
if (!ultra_execute_superhot_opcode(op)) {
if (!ultra_execute_hot_opcode(op)) {
// Cold path - use legacy handler
OpPtr[op](op);
}
}
// ADVANCED TIMING FIX: Proper cycle counting for A/V sync
// The ultra-interpreter is more efficient than legacy, so we need to add
@ -805,4 +774,66 @@ void* Get_UltraInterpreter() {
INFO_LOG(INTERPRETER, "🚀 ULTRA-INTERPRETER: Simpler than legacy but faster!");
return (void*)ultra_interpreter_run;
}
// SECONDARY HOT PATH: Handle next tier of opcodes with minimal overhead
static inline bool ultra_execute_hot_opcode(u16 op) {
u32 op_high = op >> 12;
u32 op_low = op & 0xF;
// Memory operations with pre-decrement/post-increment
if (op_high == 0x2) {
if (op_low == 0x6) { // mov.l <REG_M>,@-<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] -= 4;
WriteMem32(r[n], r[m]);
return true;
} else if (op_low == 0x9) { // and <REG_M>,<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] &= r[m];
return true;
} else if (op_low == 0xB) { // or <REG_M>,<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] |= r[m];
return true;
} else if (op_low == 0xA) { // xor <REG_M>,<REG_N>
u32 n = (op >> 8) & 0xF;
u32 m = (op >> 4) & 0xF;
r[n] ^= r[m];
return true;
}
}
// Shift operations
else if (op_high == 0x4) {
u32 low_byte = op & 0xFF;
if (low_byte == 0x00) { // shll <REG_N>
u32 n = (op >> 8) & 0xF;
sr.T = r[n] >> 31;
r[n] <<= 1;
return true;
} else if (low_byte == 0x01) { // shlr <REG_N>
u32 n = (op >> 8) & 0xF;
sr.T = r[n] & 1;
r[n] >>= 1;
return true;
}
}
// Control flow and special operations
else if (op_high == 0x0) {
u32 low_byte = op & 0xFF;
if (low_byte == 0x08) { // clrt
sr.T = 0;
return true;
} else if (low_byte == 0x18) { // sett
sr.T = 1;
return true;
}
}
return false; // Not handled in hot path
}