178 lines
6.6 KiB
Python
178 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
gen_sh4_ir_tables.py – quick-and-dirty generator for Flycast's IR interpreter.
|
||
|
||
Reads core/hw/sh4/sh4_opcode_list.cpp (legacy table) and emits two C++ include
|
||
fragments:
|
||
• ir_emitter_table.inc // opcode -> Op enum name (or ILLEGAL)
|
||
• ir_executor_table.inc // opcode -> Exec helper function/lambda name
|
||
|
||
The legacy file already defines an OpCallFP* array `OpPtr` and an `OpDesc`
|
||
lookup with metadata (mnemonic, flags). We mine these for the mnemonic so we
|
||
can map them directly to IR Op enum names following a simple rule:
|
||
|
||
mnemonic "MOV.L" -> enum string "MOV_L"
|
||
mnemonic "ADD" -> "ADD"
|
||
mnemonic with slash ("CMP/HS") drops the slash -> "CMP_HS"
|
||
|
||
Anything we cannot match goes to ILLEGAL and will therefore fall back to the
|
||
legacy interpreter.
|
||
|
||
This is NOT a perfect parser – it relies on the fairly consistent formatting
|
||
of sh4_opcode_list.cpp. Good enough to bootstrap – we can hand-fix corner
|
||
cases.
|
||
"""
|
||
import re
|
||
from pathlib import Path
|
||
import argparse
|
||
import sys
|
||
|
||
ROOT = Path(__file__).resolve().parents[1] # project root (flycast-jitless)
|
||
LEGACY_SRC = ROOT / 'core/hw/sh4/sh4_opcode_list.cpp'
|
||
EMITTER_INC = ROOT / 'core/hw/sh4/ir/ir_emitter_table.inc'
|
||
EXECUTOR_INC = ROOT / 'core/hw/sh4/ir/ir_executor_table.inc'
|
||
OPDEFS_SRC = ROOT / 'core/hw/sh4/ir/ir_defs.h'
|
||
OPNAMES_INC = ROOT / 'core/hw/sh4/ir/ir_opnames.inc'
|
||
ENUM_INC = ROOT / 'core/hw/sh4/ir/ir_defs_auto.inc'
|
||
|
||
# Match the initialization array entries:
|
||
# {0 , i0000..., Mask_n , 0x200C , Normal , "cmp/str <REG_M>,<REG_N>" , ...}
|
||
entry_re = re.compile(r'\{[^\}]*?,\s*0x([0-9A-Fa-f]{4})\s*,[^\}]*?"([^"]+)"')
|
||
|
||
def sanitize_mnemonic(text: str) -> str:
|
||
# Take the part before space or tab, drop size suffix (.B/.W/.L etc.),
|
||
# convert special chars to underscores.
|
||
mnem = text.split()[0]
|
||
mnem = mnem.replace('.', '_').replace('/', '_').replace('#', 'IMM')
|
||
return mnem.upper()
|
||
|
||
|
||
def parse_legacy() -> dict:
|
||
tbl = {}
|
||
with open(LEGACY_SRC, 'r', encoding='utf-8') as f:
|
||
for line in f:
|
||
m = entry_re.search(line)
|
||
if not m:
|
||
continue
|
||
op_hex = m.group(1)
|
||
opcode = int(op_hex, 16)
|
||
mnemonic_raw = m.group(2)
|
||
mnemonic = sanitize_mnemonic(mnemonic_raw)
|
||
# first occurrence wins; ignore duplicates with different mnemonic but same opcode value
|
||
if opcode not in tbl:
|
||
tbl[opcode] = mnemonic
|
||
return tbl
|
||
|
||
|
||
def parse_manual_names() -> list:
|
||
names = []
|
||
with open(OPDEFS_SRC, 'r', encoding='utf-8') as f:
|
||
inside = False
|
||
for line in f:
|
||
if 'enum class Op' in line:
|
||
inside = True
|
||
continue
|
||
if inside:
|
||
# Keep parsing past the auto-generated include so that
|
||
# any manually-defined opcodes *after* ir_defs_auto.inc
|
||
# (e.g. the R0-offset LOAD/STORE helpers) are treated as
|
||
# manual and therefore excluded from the auto list.
|
||
if '#include' in line and 'ir_defs_auto.inc' in line:
|
||
continue
|
||
if '};' in line:
|
||
break
|
||
line = line.strip()
|
||
if not line or line.startswith('//'):
|
||
continue
|
||
token = line.split('=')[0].strip().rstrip(',')
|
||
if token:
|
||
names.append(token)
|
||
return names
|
||
|
||
|
||
def gather_helper_names(manual_names: list, mapping: dict) -> list:
|
||
"""Return a sorted list of opcode names that should be auto-generated.
|
||
This consists of:
|
||
1. All sanitized mnemonics from the SH4 legacy opcode list that are not
|
||
present in ``manual_names``.
|
||
2. Internal helper opcodes required by the IR that do not come from the
|
||
legacy table (e.g. ILLEGAL, MOV_REG, etc.).
|
||
The resulting list is de-duplicated and sorted to get deterministic
|
||
output.
|
||
"""
|
||
legacy_names = {v for v in mapping.values() if v not in manual_names}
|
||
# Extra internal IR opcodes not present in the SH4 legacy table
|
||
extra_internal = {
|
||
'ILLEGAL',
|
||
'STSL_PR_PREDEC',
|
||
'STSL_MACL_PREDEC',
|
||
'MOV_REG', 'MOV_IMM', 'ADD_REG', 'ADD_IMM',
|
||
'LOAD16_IMM', 'LOAD32_IMM', 'LOAD32_PC', 'LOAD16_PC',
|
||
'JSR', 'RTS', 'BRA',
|
||
'FMOV_LOAD_R0', 'FMOV_STORE_R0',
|
||
# helper-only R0 offset variants
|
||
'LOAD8_R0', 'STORE8_R0', 'STORE16_R0', 'STORE32_R0',
|
||
'STORE8_R0_REG', 'STORE16_R0_REG', 'STORE32_R0_REG',
|
||
'STORE8_Rm_R0RN', 'STORE16_Rm_R0RN', 'STORE32_Rm_R0RN',
|
||
'LOAD16_R0', 'LOAD32_R0',
|
||
'FMOV_STORE_PREDEC',
|
||
}
|
||
helpers = sorted((legacy_names | extra_internal) - {'NUM_OPS'})
|
||
return helpers
|
||
|
||
|
||
def write_opnames_inc(path: Path, names: list):
|
||
with open(path, 'w', encoding='utf-8') as f:
|
||
f.write('// AUTO-GENERATED by tools/gen_sh4_ir_tables.py – DO NOT EDIT\n')
|
||
f.write('#pragma once\n')
|
||
f.write('static const char* kOpNames[] = {\n')
|
||
for n in names:
|
||
f.write(f' "{n}",\n')
|
||
f.write('};\n')
|
||
|
||
|
||
def write_inc(path: Path, mapping: dict, kind: str):
|
||
"""kind is 'EMIT' or 'EXEC'"""
|
||
with open(path, 'w', encoding='utf-8') as f:
|
||
f.write('// AUTO-GENERATED by tools/gen_sh4_ir_tables.py – DO NOT EDIT\n')
|
||
f.write('#pragma once\n')
|
||
f.write('// Mapping table: index = 16-bit opcode\n')
|
||
f.write('static const Op kSh4IrTbl_%s[0x10000] = {\n' % kind)
|
||
for op in range(0x10000):
|
||
mnem = mapping.get(op, 'ILLEGAL')
|
||
f.write(' /*%04X*/ Op::%s,%s' % (op, mnem, '\n' if (op & 15) == 15 else ' '))
|
||
f.write('};\n')
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Generate enum value include so Op enum stays in sync with tables.
|
||
|
||
def write_enum_inc(path: Path, names: list, start_index: int = 0):
|
||
with open(path, 'w', encoding='utf-8') as f:
|
||
f.write('// AUTO-GENERATED by tools/gen_sh4_ir_tables.py – DO NOT EDIT\n')
|
||
f.write('#pragma once\n')
|
||
for idx, n in enumerate(names, start=start_index):
|
||
f.write(f'{n} = {idx},\n')
|
||
|
||
|
||
def main():
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument('--quiet', action='store_true')
|
||
args = ap.parse_args()
|
||
|
||
mapping = parse_legacy()
|
||
write_inc(EMITTER_INC, mapping, 'EMIT')
|
||
write_inc(EXECUTOR_INC, mapping, 'EXEC')
|
||
|
||
manual_names = parse_manual_names()
|
||
# helper names computed fresh each run
|
||
helper_names = gather_helper_names(manual_names, mapping)
|
||
write_opnames_inc(OPNAMES_INC, manual_names + helper_names)
|
||
write_enum_inc(ENUM_INC, helper_names, start_index=len(manual_names))
|
||
|
||
if not args.quiet:
|
||
print('Generated emitter/executor tables and op names inc file')
|
||
|
||
if __name__ == '__main__':
|
||
main()
|