flycast/tools/gen_sh4_ir_tables.py

178 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
gen_sh4_ir_tables.py quick-and-dirty generator for Flycast's IR interpreter.
Reads core/hw/sh4/sh4_opcode_list.cpp (legacy table) and emits two C++ include
fragments:
• ir_emitter_table.inc // opcode -> Op enum name (or ILLEGAL)
• ir_executor_table.inc // opcode -> Exec helper function/lambda name
The legacy file already defines an OpCallFP* array `OpPtr` and an `OpDesc`
lookup with metadata (mnemonic, flags). We mine these for the mnemonic so we
can map them directly to IR Op enum names following a simple rule:
mnemonic "MOV.L" -> enum string "MOV_L"
mnemonic "ADD" -> "ADD"
mnemonic with slash ("CMP/HS") drops the slash -> "CMP_HS"
Anything we cannot match goes to ILLEGAL and will therefore fall back to the
legacy interpreter.
This is NOT a perfect parser it relies on the fairly consistent formatting
of sh4_opcode_list.cpp. Good enough to bootstrap we can hand-fix corner
cases.
"""
import re
from pathlib import Path
import argparse
import sys
ROOT = Path(__file__).resolve().parents[1] # project root (flycast-jitless)
LEGACY_SRC = ROOT / 'core/hw/sh4/sh4_opcode_list.cpp'
EMITTER_INC = ROOT / 'core/hw/sh4/ir/ir_emitter_table.inc'
EXECUTOR_INC = ROOT / 'core/hw/sh4/ir/ir_executor_table.inc'
OPDEFS_SRC = ROOT / 'core/hw/sh4/ir/ir_defs.h'
OPNAMES_INC = ROOT / 'core/hw/sh4/ir/ir_opnames.inc'
ENUM_INC = ROOT / 'core/hw/sh4/ir/ir_defs_auto.inc'
# Match the initialization array entries:
# {0 , i0000..., Mask_n , 0x200C , Normal , "cmp/str <REG_M>,<REG_N>" , ...}
entry_re = re.compile(r'\{[^\}]*?,\s*0x([0-9A-Fa-f]{4})\s*,[^\}]*?"([^"]+)"')
def sanitize_mnemonic(text: str) -> str:
# Take the part before space or tab, drop size suffix (.B/.W/.L etc.),
# convert special chars to underscores.
mnem = text.split()[0]
mnem = mnem.replace('.', '_').replace('/', '_').replace('#', 'IMM')
return mnem.upper()
def parse_legacy() -> dict:
tbl = {}
with open(LEGACY_SRC, 'r', encoding='utf-8') as f:
for line in f:
m = entry_re.search(line)
if not m:
continue
op_hex = m.group(1)
opcode = int(op_hex, 16)
mnemonic_raw = m.group(2)
mnemonic = sanitize_mnemonic(mnemonic_raw)
# first occurrence wins; ignore duplicates with different mnemonic but same opcode value
if opcode not in tbl:
tbl[opcode] = mnemonic
return tbl
def parse_manual_names() -> list:
names = []
with open(OPDEFS_SRC, 'r', encoding='utf-8') as f:
inside = False
for line in f:
if 'enum class Op' in line:
inside = True
continue
if inside:
# Keep parsing past the auto-generated include so that
# any manually-defined opcodes *after* ir_defs_auto.inc
# (e.g. the R0-offset LOAD/STORE helpers) are treated as
# manual and therefore excluded from the auto list.
if '#include' in line and 'ir_defs_auto.inc' in line:
continue
if '};' in line:
break
line = line.strip()
if not line or line.startswith('//'):
continue
token = line.split('=')[0].strip().rstrip(',')
if token:
names.append(token)
return names
def gather_helper_names(manual_names: list, mapping: dict) -> list:
"""Return a sorted list of opcode names that should be auto-generated.
This consists of:
1. All sanitized mnemonics from the SH4 legacy opcode list that are not
present in ``manual_names``.
2. Internal helper opcodes required by the IR that do not come from the
legacy table (e.g. ILLEGAL, MOV_REG, etc.).
The resulting list is de-duplicated and sorted to get deterministic
output.
"""
legacy_names = {v for v in mapping.values() if v not in manual_names}
# Extra internal IR opcodes not present in the SH4 legacy table
extra_internal = {
'ILLEGAL',
'STSL_PR_PREDEC',
'STSL_MACL_PREDEC',
'MOV_REG', 'MOV_IMM', 'ADD_REG', 'ADD_IMM',
'LOAD16_IMM', 'LOAD32_IMM', 'LOAD32_PC', 'LOAD16_PC',
'JSR', 'RTS', 'BRA',
'FMOV_LOAD_R0', 'FMOV_STORE_R0',
# helper-only R0 offset variants
'LOAD8_R0', 'STORE8_R0', 'STORE16_R0', 'STORE32_R0',
'STORE8_R0_REG', 'STORE16_R0_REG', 'STORE32_R0_REG',
'STORE8_Rm_R0RN', 'STORE16_Rm_R0RN', 'STORE32_Rm_R0RN',
'LOAD16_R0', 'LOAD32_R0',
'FMOV_STORE_PREDEC',
}
helpers = sorted((legacy_names | extra_internal) - {'NUM_OPS'})
return helpers
def write_opnames_inc(path: Path, names: list):
with open(path, 'w', encoding='utf-8') as f:
f.write('// AUTO-GENERATED by tools/gen_sh4_ir_tables.py DO NOT EDIT\n')
f.write('#pragma once\n')
f.write('static const char* kOpNames[] = {\n')
for n in names:
f.write(f' "{n}",\n')
f.write('};\n')
def write_inc(path: Path, mapping: dict, kind: str):
"""kind is 'EMIT' or 'EXEC'"""
with open(path, 'w', encoding='utf-8') as f:
f.write('// AUTO-GENERATED by tools/gen_sh4_ir_tables.py DO NOT EDIT\n')
f.write('#pragma once\n')
f.write('// Mapping table: index = 16-bit opcode\n')
f.write('static const Op kSh4IrTbl_%s[0x10000] = {\n' % kind)
for op in range(0x10000):
mnem = mapping.get(op, 'ILLEGAL')
f.write(' /*%04X*/ Op::%s,%s' % (op, mnem, '\n' if (op & 15) == 15 else ' '))
f.write('};\n')
# -----------------------------------------------------------------------------
# Generate enum value include so Op enum stays in sync with tables.
def write_enum_inc(path: Path, names: list, start_index: int = 0):
with open(path, 'w', encoding='utf-8') as f:
f.write('// AUTO-GENERATED by tools/gen_sh4_ir_tables.py DO NOT EDIT\n')
f.write('#pragma once\n')
for idx, n in enumerate(names, start=start_index):
f.write(f'{n} = {idx},\n')
def main():
ap = argparse.ArgumentParser()
ap.add_argument('--quiet', action='store_true')
args = ap.parse_args()
mapping = parse_legacy()
write_inc(EMITTER_INC, mapping, 'EMIT')
write_inc(EXECUTOR_INC, mapping, 'EXEC')
manual_names = parse_manual_names()
# helper names computed fresh each run
helper_names = gather_helper_names(manual_names, mapping)
write_opnames_inc(OPNAMES_INC, manual_names + helper_names)
write_enum_inc(ENUM_INC, helper_names, start_index=len(manual_names))
if not args.quiet:
print('Generated emitter/executor tables and op names inc file')
if __name__ == '__main__':
main()