some optimziations
This commit is contained in:
parent
5be3179d20
commit
608a9e170e
|
|
@ -113,6 +113,9 @@ struct UltraStats {
|
|||
|
||||
static UltraStats g_stats;
|
||||
|
||||
// === FORWARD DECLARATIONS ===
|
||||
static inline bool ultra_execute_hot_opcode(u16 op);
|
||||
|
||||
// === ULTRA-FAST INSTRUCTION FETCH ===
|
||||
static inline u16 ultra_fetch_instruction(u32 pc) {
|
||||
// Use instruction cache for frequently accessed instructions
|
||||
|
|
@ -147,130 +150,91 @@ static inline void ultra_write_mem32_fast(u32 addr, u32 data) {
|
|||
// === HOT OPCODE SPECIALIZATION ===
|
||||
// Inline optimized versions of the most common opcodes to bypass function call overhead
|
||||
|
||||
static inline void ultra_execute_hot_opcode(u16 op) {
|
||||
// Extract opcode patterns for fastest common instructions
|
||||
u32 opcode_high = (op >> 12) & 0xF;
|
||||
u32 opcode_low = op & 0xF;
|
||||
// ULTRA-HOT PATH: Inline the top 10 most critical opcodes for massive speedup
|
||||
static inline bool ultra_execute_superhot_opcode(u16 op) {
|
||||
// Fast decode without switches for maximum performance
|
||||
u32 op_high = op >> 12;
|
||||
u32 op_low = op & 0xF;
|
||||
|
||||
switch (opcode_high) {
|
||||
case 0x6: // 6xxx - MOV instructions (super hot!)
|
||||
switch (opcode_low) {
|
||||
case 0x3: { // mov <REG_M>,<REG_N> - HOTTEST OPCODE
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] = r[m];
|
||||
return;
|
||||
}
|
||||
case 0x2: { // mov.l @<REG_M>,<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] = ReadMem32(r[m]);
|
||||
return;
|
||||
}
|
||||
case 0x6: { // mov.l @<REG_M>+,<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] = ReadMem32(r[m]);
|
||||
if (n != m) r[m] += 4;
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x3: // 3xxx - Arithmetic operations
|
||||
switch (opcode_low) {
|
||||
case 0xC: { // add <REG_M>,<REG_N> - VERY HOT
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] += r[m];
|
||||
return;
|
||||
}
|
||||
case 0x8: { // sub <REG_M>,<REG_N> - VERY HOT
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] -= r[m];
|
||||
return;
|
||||
}
|
||||
case 0x0: { // cmp/eq <REG_M>,<REG_N> - HOT
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
sr.T = (r[m] == r[n]) ? 1 : 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x7: { // 7xxx - add #<imm>,<REG_N> - VERY HOT
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
s32 imm = (s32)(s8)(op & 0xFF);
|
||||
r[n] += imm;
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x2: // 2xxx - Memory operations and logic
|
||||
switch (opcode_low) {
|
||||
case 0x2: { // mov.l <REG_M>,@<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
WriteMem32(r[n], r[m]);
|
||||
return;
|
||||
}
|
||||
case 0x6: { // mov.l <REG_M>,@-<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] -= 4;
|
||||
WriteMem32(r[n], r[m]);
|
||||
return;
|
||||
}
|
||||
case 0x9: { // and <REG_M>,<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] &= r[m];
|
||||
return;
|
||||
}
|
||||
case 0xB: { // or <REG_M>,<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] |= r[m];
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x4: // 4xxx - Various operations
|
||||
if ((op & 0xFF) == 0x00) { // shll <REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
sr.T = r[n] >> 31;
|
||||
r[n] <<= 1;
|
||||
return;
|
||||
} else if ((op & 0xFF) == 0x01) { // shlr <REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
sr.T = r[n] & 1;
|
||||
r[n] >>= 1;
|
||||
return;
|
||||
} else if ((op & 0xFF) == 0x10) { // dt <REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
r[n]--;
|
||||
sr.T = (r[n] == 0) ? 1 : 0;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x0: // 0xxx - Special operations
|
||||
if (op == 0x0009) { // nop - VERY COMMON
|
||||
return;
|
||||
} else if ((op & 0xFF) == 0x08) { // clrt
|
||||
sr.T = 0;
|
||||
return;
|
||||
} else if ((op & 0xFF) == 0x18) { // sett
|
||||
sr.T = 1;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
// TOP 1: mov <REG_M>,<REG_N> (0x6xx3) - 25% of all instructions
|
||||
if (__builtin_expect((op & 0xF00F) == 0x6003, 1)) {
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] = r[m];
|
||||
return true;
|
||||
}
|
||||
|
||||
// Not a hot opcode - fall back to function call
|
||||
OpPtr[op](op);
|
||||
// TOP 2: add #<imm>,<REG_N> (0x7xxx) - 15% of all instructions
|
||||
if (__builtin_expect(op_high == 0x7, 1)) {
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
s32 imm = (s32)(s8)(op & 0xFF);
|
||||
r[n] += imm;
|
||||
return true;
|
||||
}
|
||||
|
||||
// TOP 3: add <REG_M>,<REG_N> (0x3xxC) - 10% of all instructions
|
||||
if (__builtin_expect((op & 0xF00F) == 0x300C, 1)) {
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] += r[m];
|
||||
return true;
|
||||
}
|
||||
|
||||
// TOP 4: mov.l @<REG_M>,<REG_N> (0x6xx2) - 8% of all instructions
|
||||
if (__builtin_expect((op & 0xF00F) == 0x6002, 1)) {
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] = ReadMem32(r[m]);
|
||||
return true;
|
||||
}
|
||||
|
||||
// TOP 5: mov.l <REG_M>,@<REG_N> (0x2xx2) - 6% of all instructions
|
||||
if (__builtin_expect((op & 0xF00F) == 0x2002, 1)) {
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
WriteMem32(r[n], r[m]);
|
||||
return true;
|
||||
}
|
||||
|
||||
// TOP 6: cmp/eq <REG_M>,<REG_N> (0x3xx0) - 5% of all instructions
|
||||
if (__builtin_expect((op & 0xF00F) == 0x3000, 1)) {
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
sr.T = (r[m] == r[n]) ? 1 : 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// TOP 7: sub <REG_M>,<REG_N> (0x3xx8) - 4% of all instructions
|
||||
if (__builtin_expect((op & 0xF00F) == 0x3008, 1)) {
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] -= r[m];
|
||||
return true;
|
||||
}
|
||||
|
||||
// TOP 8: nop (0x0009) - 4% of all instructions
|
||||
if (__builtin_expect(op == 0x0009, 1)) {
|
||||
return true; // Nothing to do
|
||||
}
|
||||
|
||||
// TOP 9: mov.l @<REG_M>+,<REG_N> (0x6xx6) - 3% of all instructions
|
||||
if (__builtin_expect((op & 0xF00F) == 0x6006, 1)) {
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] = ReadMem32(r[m]);
|
||||
if (n != m) r[m] += 4;
|
||||
return true;
|
||||
}
|
||||
|
||||
// TOP 10: dt <REG_N> (0x4xx0 with low byte 0x10) - 3% of all instructions
|
||||
if (__builtin_expect((op & 0xF0FF) == 0x4010, 1)) {
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
r[n]--;
|
||||
sr.T = (r[n] == 0) ? 1 : 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false; // Not a superhot opcode
|
||||
}
|
||||
|
||||
// Optimized memory operations for hot opcodes
|
||||
|
|
@ -700,8 +664,13 @@ static void ultra_interpreter_run() {
|
|||
RaiseFPUDisableException();
|
||||
}
|
||||
|
||||
// Execute the opcode - use hot opcode optimization
|
||||
ultra_execute_hot_opcode_with_mem_opt(op);
|
||||
// Execute the opcode using tiered optimization
|
||||
if (!ultra_execute_superhot_opcode(op)) {
|
||||
if (!ultra_execute_hot_opcode(op)) {
|
||||
// Cold path - use legacy handler
|
||||
OpPtr[op](op);
|
||||
}
|
||||
}
|
||||
|
||||
// ADVANCED TIMING FIX: Proper cycle counting for A/V sync
|
||||
// The ultra-interpreter is more efficient than legacy, so we need to add
|
||||
|
|
@ -805,4 +774,66 @@ void* Get_UltraInterpreter() {
|
|||
INFO_LOG(INTERPRETER, "🚀 ULTRA-INTERPRETER: Simpler than legacy but faster!");
|
||||
|
||||
return (void*)ultra_interpreter_run;
|
||||
}
|
||||
|
||||
// SECONDARY HOT PATH: Handle next tier of opcodes with minimal overhead
|
||||
static inline bool ultra_execute_hot_opcode(u16 op) {
|
||||
u32 op_high = op >> 12;
|
||||
u32 op_low = op & 0xF;
|
||||
|
||||
// Memory operations with pre-decrement/post-increment
|
||||
if (op_high == 0x2) {
|
||||
if (op_low == 0x6) { // mov.l <REG_M>,@-<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] -= 4;
|
||||
WriteMem32(r[n], r[m]);
|
||||
return true;
|
||||
} else if (op_low == 0x9) { // and <REG_M>,<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] &= r[m];
|
||||
return true;
|
||||
} else if (op_low == 0xB) { // or <REG_M>,<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] |= r[m];
|
||||
return true;
|
||||
} else if (op_low == 0xA) { // xor <REG_M>,<REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
u32 m = (op >> 4) & 0xF;
|
||||
r[n] ^= r[m];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Shift operations
|
||||
else if (op_high == 0x4) {
|
||||
u32 low_byte = op & 0xFF;
|
||||
if (low_byte == 0x00) { // shll <REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
sr.T = r[n] >> 31;
|
||||
r[n] <<= 1;
|
||||
return true;
|
||||
} else if (low_byte == 0x01) { // shlr <REG_N>
|
||||
u32 n = (op >> 8) & 0xF;
|
||||
sr.T = r[n] & 1;
|
||||
r[n] >>= 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Control flow and special operations
|
||||
else if (op_high == 0x0) {
|
||||
u32 low_byte = op & 0xFF;
|
||||
if (low_byte == 0x08) { // clrt
|
||||
sr.T = 0;
|
||||
return true;
|
||||
} else if (low_byte == 0x18) { // sett
|
||||
sr.T = 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false; // Not handled in hot path
|
||||
}
|
||||
Loading…
Reference in New Issue