convert back to using backpatches and trampolines vs recompiling for fastmem

This commit is contained in:
Anthony Pesch 2017-08-16 11:26:45 -04:00
parent 6b77942ee4
commit 4321b43ccc
13 changed files with 214 additions and 265 deletions

View File

@ -360,6 +360,17 @@ else()
set(REDREAM_FLAGS ${RELIB_FLAGS})
endif()
# fastmem can be troublesome when running under gdb or lldb. when doing so,
# SIGSEGV handling can be completely disabled with:
# handle SIGSEGV nostop noprint pass
# however, then legitimate SIGSEGV will also not be handled by the debugger.
# as of this writing, there is no way to configure the debugger to ignore the
# signal initially, letting us try to handle it, and then handling it in the
# case that we do not (e.g. because it was not a fastmem-related segfault).
# because of this, fastmem is default disabled for debug builds to cause less
# headaches
list(APPEND REDREAM_DEFS $<$<NOT:$<CONFIG:Debug>>:HAVE_FASTMEM>)
source_group_by_dir(REDREAM_SOURCES)
if(BUILD_LIBRETRO OR ANDROID)

View File

@ -2194,6 +2194,38 @@ public:
}
#endif
void nop(int size = 1)
{
/*
AMD and Intel both agree on the same multibyte nops for up to 9 bytes:
https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf
https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
*/
static const uint8_t nopSeq[9][9] = {
{0x90},
{0x66, 0x90},
{0x0F, 0x1F, 0x00},
{0x0F, 0x1F, 0x40, 0x00},
{0x0F, 0x1F, 0x44, 0x00, 0x00},
{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
};
static const int numSeq = sizeof(nopSeq) / sizeof(nopSeq[0]);
while (size) {
int len = size > numSeq ? numSeq : size;
const uint8_t *seq = nopSeq[len - 1];
for (int i = 0; i < len; i++) {
db(seq[i]);
}
size -= len;
}
}
#ifndef XBYAK_DONT_READ_LIST
#include "xbyak_mnemonic.h"
void align(int x = 16)

View File

@ -401,7 +401,6 @@ void cwd() { db(0x66); db(0x99); }
void cwde() { db(0x98); }
void lahf() { db(0x9F); }
void lock() { db(0xF0); }
void nop() { db(0x90); }
void sahf() { db(0x9E); }
void stc() { db(0xF9); }
void std() { db(0xFD); }

View File

@ -296,91 +296,6 @@ void x64_backend_block_label(char *name, size_t size, struct ir_block *block) {
snprintf(name, size, ".%p", block);
}
static void x64_backend_emit_thunks(struct x64_backend *backend) {
auto &e = *backend->codegen;
{
for (int i = 0; i < 16; i++) {
Xbyak::Reg64 dst(i);
e.align(32);
backend->load_thunk[i] = e.getCurr<void (*)()>();
/* save caller-saved registers and offset the stack an extra 8 bytes to
align it */
#if PLATFORM_WINDOWS
e.push(e.rsi);
e.push(e.rdi);
#else
e.push(e.r8);
e.push(e.r9);
#endif
e.push(e.r10);
e.push(e.r11);
e.sub(e.rsp, X64_STACK_SHADOW_SPACE + 8);
/* call the mmio handler */
e.call(e.rax);
/* restore caller-saved registers */
e.add(e.rsp, X64_STACK_SHADOW_SPACE + 8);
e.pop(e.r11);
e.pop(e.r10);
#if PLATFORM_WINDOWS
e.pop(e.rdi);
e.pop(e.rsi);
#else
e.pop(e.r9);
e.pop(e.r8);
#endif
/* save mmio handler result */
e.mov(dst, e.rax);
/* return to jit code */
e.ret();
}
}
{
e.align(32);
backend->store_thunk = e.getCurr<void (*)()>();
/* save caller-saved registers and offset the stack an extra 8 bytes to
align it */
#if PLATFORM_WINDOWS
e.push(e.rsi);
e.push(e.rdi);
#else
e.push(e.r8);
e.push(e.r9);
#endif
e.push(e.r10);
e.push(e.r11);
e.sub(e.rsp, X64_STACK_SHADOW_SPACE + 8);
/* call the mmio handler */
e.call(e.rax);
/* restore caller-saved registers */
e.add(e.rsp, X64_STACK_SHADOW_SPACE + 8);
e.pop(e.r11);
e.pop(e.r10);
#if PLATFORM_WINDOWS
e.pop(e.rdi);
e.pop(e.rsi);
#else
e.pop(e.r9);
e.pop(e.r8);
#endif
/* return to jit code */
e.ret();
}
}
static void x64_backend_emit_constants(struct x64_backend *backend) {
auto &e = *backend->codegen;
@ -409,94 +324,152 @@ static void x64_backend_emit_constants(struct x64_backend *backend) {
e.dq(INT64_C(0x8000000000000000));
}
static void x64_backend_push_caller_saved(struct x64_backend *backend) {
auto &e = *backend->codegen;
#if PLATFORM_WINDOWS
e.push(e.rsi);
e.push(e.rdi);
#else
e.push(e.r8);
e.push(e.r9);
#endif
e.push(e.r10);
e.push(e.r11);
}
static void x64_backend_pop_caller_saved(struct x64_backend *backend) {
auto &e = *backend->codegen;
e.pop(e.r11);
e.pop(e.r10);
#if PLATFORM_WINDOWS
e.pop(e.rdi);
e.pop(e.rsi);
#else
e.pop(e.r9);
e.pop(e.r8);
#endif
}
static int x64_backend_handle_exception(struct jit_backend *base,
struct exception_state *ex) {
struct x64_backend *backend = container_of(base, struct x64_backend, base);
struct jit_guest *guest = backend->base.guest;
const uint8_t *data = (const uint8_t *)ex->thread_state.rip;
/* figure out the guest address that was being accessed */
const uint8_t *fault_addr = (const uint8_t *)ex->fault_addr;
const uint8_t *protected_start = (const uint8_t *)ex->thread_state.r15;
uint32_t guest_addr = (uint32_t)(fault_addr - protected_start);
/* ensure it was an mmio address that caused the exception */
void *ptr;
guest->lookup(guest->space, guest_addr, &ptr, NULL, NULL, NULL, NULL);
if (ptr) {
return 0;
}
/* it's assumed a mov has triggered the exception */
struct x64_mov mov;
if (!x64_decode_mov(data, &mov)) {
if (!x64_decode_mov((const uint8_t *)ex->pc, &mov)) {
return 0;
}
/* instead of handling the mmio callback from inside of the exception
handler, force rip to the beginning of a thunk which will invoke the
callback once the exception handler has exited. this frees the callbacks
from any restrictions imposed by an exception handler, and also prevents
a possible recursive exception
/* ensure it was an mmio address that caused the exception */
uint32_t guest_addr = ex->thread_state.r[mov.base];
void *host_addr = NULL;
guest->lookup(guest->space, guest_addr, &host_addr, NULL, NULL, NULL, NULL);
if (host_addr) {
return 0;
}
push the return address (the next instruction after the current mov) to
the stack. each thunk will be responsible for pushing / popping caller-
saved registers */
ex->thread_state.rsp -= 8;
*(uint64_t *)(ex->thread_state.rsp) = ex->thread_state.rip + mov.length;
CHECK(ex->thread_state.rsp % 16 == 8);
/* create trampoline to handle the mmio access */
uint8_t *tramp = NULL;
if (mov.is_load) {
/* prep argument registers (memory object, guest_addr) for read function */
ex->thread_state.r[x64_arg0_idx] = (uint64_t)guest->space;
ex->thread_state.r[x64_arg1_idx] = (uint64_t)guest_addr;
try {
auto &e = *backend->codegen;
tramp = e.getCurr<uint8_t *>();
/* prep function call address for thunk */
switch (mov.operand_size) {
case 1:
ex->thread_state.rax = (uint64_t)guest->r8;
break;
case 2:
ex->thread_state.rax = (uint64_t)guest->r16;
break;
case 4:
ex->thread_state.rax = (uint64_t)guest->r32;
break;
case 8:
ex->thread_state.rax = (uint64_t)guest->r64;
break;
if (mov.is_load) {
/* setup stack frame */
x64_backend_push_caller_saved(backend);
if (X64_STACK_SHADOW_SPACE) {
e.sub(e.rsp, X64_STACK_SHADOW_SPACE);
}
/* call the mmio handler */
uint64_t handler = 0;
switch (mov.operand_size) {
case 1:
handler = (uint64_t)guest->r8;
break;
case 2:
handler = (uint64_t)guest->r16;
break;
case 4:
handler = (uint64_t)guest->r32;
break;
case 8:
handler = (uint64_t)guest->r64;
break;
}
e.mov(arg0, (uint64_t)guest->space);
e.mov(arg1, Xbyak::Reg64(mov.base));
e.mov(e.rax, handler);
e.call(e.rax);
/* destroy stack frame */
if (X64_STACK_SHADOW_SPACE) {
e.add(e.rsp, X64_STACK_SHADOW_SPACE);
}
x64_backend_pop_caller_saved(backend);
/* save mmio handler result */
e.mov(Xbyak::Reg64(mov.reg), e.rax);
} else {
/* setup stack frame */
x64_backend_push_caller_saved(backend);
if (X64_STACK_SHADOW_SPACE) {
e.sub(e.rsp, X64_STACK_SHADOW_SPACE);
}
/* call the mmio handler */
uint64_t handler = 0;
switch (mov.operand_size) {
case 1:
handler = (uint64_t)guest->w8;
break;
case 2:
handler = (uint64_t)guest->w16;
break;
case 4:
handler = (uint64_t)guest->w32;
break;
case 8:
handler = (uint64_t)guest->w64;
break;
}
e.mov(arg0, (uint64_t)guest->space);
e.mov(arg1, Xbyak::Reg64(mov.base));
if (mov.has_imm) {
e.mov(arg2, mov.imm);
} else {
e.mov(arg2, Xbyak::Reg64(mov.reg));
}
e.mov(e.rax, handler);
e.call(e.rax);
/* destroy stack frame */
if (X64_STACK_SHADOW_SPACE) {
e.add(e.rsp, X64_STACK_SHADOW_SPACE);
}
x64_backend_pop_caller_saved(backend);
}
/* resume execution in the thunk once the exception handler exits */
ex->thread_state.rip = (uint64_t)backend->load_thunk[mov.reg];
} else {
/* prep argument registers (memory object, guest_addr, value) for write
function */
ex->thread_state.r[x64_arg0_idx] = (uint64_t)guest->space;
ex->thread_state.r[x64_arg1_idx] = (uint64_t)guest_addr;
ex->thread_state.r[x64_arg2_idx] =
mov.has_imm ? mov.imm : ex->thread_state.r[mov.reg];
/* return back to the next instruction after the mmio access */
e.jmp((void *)(ex->pc + X64_SLOWMEM_PATCH_SIZE));
} catch (const Xbyak::Error &e) {
LOG_FATAL("x64 codegen failure: %s", e.what());
}
/* prep function call address for thunk */
switch (mov.operand_size) {
case 1:
ex->thread_state.rax = (uint64_t)guest->w8;
break;
case 2:
ex->thread_state.rax = (uint64_t)guest->w16;
break;
case 4:
ex->thread_state.rax = (uint64_t)guest->w32;
break;
case 8:
ex->thread_state.rax = (uint64_t)guest->w64;
break;
}
/* backpatch the call site to jump to the new trampoline */
try {
Xbyak::CodeGenerator e(X64_SLOWMEM_PATCH_SIZE, (void *)ex->pc);
uint8_t *begin = e.getCurr<uint8_t*>();
e.jmp(tramp);
/* resume execution in the thunk once the exception handler exits */
ex->thread_state.rip = (uint64_t)backend->store_thunk;
int padding = X64_SLOWMEM_PATCH_SIZE - (e.getCurr<uint8_t *>() - begin);
CHECK_GE(padding, 0);
e.nop(padding);
} catch (const Xbyak::Error &e) {
LOG_FATAL("x64 codegen failure: %s", e.what());
}
return 1;
@ -677,11 +650,13 @@ static int x64_backend_assemble_code(struct jit_backend *base, struct ir *ir,
x64_backend_emit(backend, ir, emit_cb, emit_data);
} catch (const Xbyak::Error &e) {
if (e != Xbyak::ERR_CODE_IS_TOO_BIG) {
LOG_FATAL("x64 codegen failure, %s", e.what());
LOG_FATAL("x64 codegen failure: %s", e.what());
}
res = 0;
}
/* TODO return 0 if there's not enough extra room for a few slowmem patches */
/* return code address */
*addr = code;
*size = (int)(e.getCurr<uint8_t *>() - code);
@ -752,7 +727,6 @@ struct jit_backend *x64_backend_create(struct jit_guest *guest, void *code,
/* emit initial thunks */
x64_dispatch_init(backend);
x64_dispatch_emit_thunks(backend);
x64_backend_emit_thunks(backend);
x64_backend_emit_constants(backend);
CHECK_LT(backend->codegen->getSize(), X64_THUNK_SIZE);

View File

@ -98,7 +98,18 @@ EMITTER(STORE_HOST, CONSTRAINTS(NONE, REG_I64, VAL_ALL)) {
x64_backend_store_mem(backend, dst, data);
}
EMITTER(LOAD_GUEST, CONSTRAINTS(REG_ALL, REG_I64 | IMM_I32)) {
EMITTER(LOAD_GUEST, CONSTRAINTS(REG_ALL, REG_I64)) {
#ifdef HAVE_FASTMEM
struct ir_value *dst = RES;
Xbyak::Reg addr = ARG0_REG;
uint8_t *begin = e.getCurr<uint8_t *>();
x64_backend_load_mem(backend, dst, addr.cvt64() + guestmem);
int padding = X64_SLOWMEM_PATCH_SIZE - (e.getCurr<uint8_t *>() - begin);
CHECK_GE(padding, 0);
e.nop(padding);
#else
struct jit_guest *guest = backend->base.guest;
Xbyak::Reg dst = RES_REG;
struct ir_value *addr = ARG0;
@ -153,9 +164,21 @@ EMITTER(LOAD_GUEST, CONSTRAINTS(REG_ALL, REG_I64 | IMM_I32)) {
e.call((void *)fn);
e.mov(dst, e.rax);
}
#endif
}
EMITTER(STORE_GUEST, CONSTRAINTS(NONE, REG_I64 | IMM_I32, VAL_ALL)) {
EMITTER(STORE_GUEST, CONSTRAINTS(NONE, REG_I64, REG_ALL)) {
#ifdef HAVE_FASTMEM
Xbyak::Reg addr = ARG0_REG;
struct ir_value *data = ARG1;
uint8_t *begin = e.getCurr<uint8_t *>();
x64_backend_store_mem(backend, addr.cvt64() + guestmem, data);
int padding = X64_SLOWMEM_PATCH_SIZE - (e.getCurr<uint8_t *>() - begin);
CHECK_GE(padding, 0);
e.nop(padding);
#else
struct jit_guest *guest = backend->base.guest;
struct ir_value *addr = ARG0;
struct ir_value *data = ARG1;
@ -210,20 +233,7 @@ EMITTER(STORE_GUEST, CONSTRAINTS(NONE, REG_I64 | IMM_I32, VAL_ALL)) {
x64_backend_mov_value(backend, arg2, data);
e.call((void *)fn);
}
}
EMITTER(LOAD_FAST, CONSTRAINTS(REG_ALL, REG_I64)) {
struct ir_value *dst = RES;
Xbyak::Reg addr = ARG0_REG;
x64_backend_load_mem(backend, dst, addr.cvt64() + guestmem);
}
EMITTER(STORE_FAST, CONSTRAINTS(NONE, REG_I64, VAL_ALL)) {
Xbyak::Reg addr = ARG0_REG;
struct ir_value *data = ARG1;
x64_backend_store_mem(backend, addr.cvt64() + guestmem, data);
#endif
}
EMITTER(LOAD_CONTEXT, CONSTRAINTS(REG_ALL, IMM_I32)) {

View File

@ -39,8 +39,6 @@ struct x64_backend {
void *dispatch_interrupt;
void (*dispatch_enter)(int32_t);
void *dispatch_exit;
void (*load_thunk[16])();
void (*store_thunk)();
/* debug stats */
csh capstone_handle;
@ -57,9 +55,10 @@ struct x64_backend {
#else
#define X64_STACK_SHADOW_SPACE 0
#endif
#define X64_STACK_LOCALS (X64_STACK_SHADOW_SPACE + 8)
#define X64_SLOWMEM_PATCH_SIZE 6
#define X64_USE_AVX backend->use_avx
struct ir_value;

View File

@ -447,23 +447,6 @@ void ir_store_guest(struct ir *ir, struct ir_value *addr, struct ir_value *v) {
ir_set_arg1(ir, instr, v);
}
struct ir_value *ir_load_fast(struct ir *ir, struct ir_value *addr,
enum ir_type type) {
CHECK_EQ(VALUE_I32, addr->type);
struct ir_instr *instr = ir_append_instr(ir, OP_LOAD_FAST, type);
ir_set_arg0(ir, instr, addr);
return instr->result;
}
void ir_store_fast(struct ir *ir, struct ir_value *addr, struct ir_value *v) {
CHECK_EQ(VALUE_I32, addr->type);
struct ir_instr *instr = ir_append_instr(ir, OP_STORE_FAST, VALUE_V);
ir_set_arg0(ir, instr, addr);
ir_set_arg1(ir, instr, v);
}
struct ir_value *ir_load_context(struct ir *ir, size_t offset,
enum ir_type type) {
CHECK_LE(offset + ir_type_size(type), IR_MAX_CONTEXT);

View File

@ -291,10 +291,6 @@ struct ir_value *ir_load_guest(struct ir *ir, struct ir_value *addr,
enum ir_type type);
void ir_store_guest(struct ir *ir, struct ir_value *addr, struct ir_value *v);
struct ir_value *ir_load_fast(struct ir *ir, struct ir_value *addr,
enum ir_type type);
void ir_store_fast(struct ir *ir, struct ir_value *addr, struct ir_value *v);
/* context operations */
struct ir_value *ir_load_context(struct ir *ir, size_t offset,
enum ir_type type);

View File

@ -2,10 +2,13 @@ IR_OP(SOURCE_INFO, 0)
IR_OP(FALLBACK, IR_FLAG_CALL)
IR_OP(LOAD_HOST, 0)
IR_OP(STORE_HOST, 0)
#ifdef HAVE_FASTMEM
IR_OP(LOAD_GUEST, 0)
IR_OP(STORE_GUEST, 0)
#else
IR_OP(LOAD_GUEST, IR_FLAG_CALL)
IR_OP(STORE_GUEST, IR_FLAG_CALL)
IR_OP(LOAD_FAST, 0)
IR_OP(STORE_FAST, 0)
#endif
IR_OP(LOAD_CONTEXT, 0)
IR_OP(STORE_CONTEXT, 0)
IR_OP(LOAD_LOCAL, 0)

View File

@ -139,11 +139,8 @@ static void jit_restore_edges(struct jit *jit, struct jit_block *block) {
PROF_LEAVE();
}
static void jit_invalidate_block(struct jit *jit, struct jit_block *block,
int fastmem) {
/* blocks that are invalidated due to a fastmem exception aren't invalid at
the guest level, they just need to be recompiled with different options */
block->state = fastmem ? JIT_STATE_RECOMPILE : JIT_STATE_INVALID;
static void jit_invalidate_block(struct jit *jit, struct jit_block *block) {
block->state = JIT_STATE_INVALID;
jit->backend->invalidate_code(jit->backend, block->guest_addr);
@ -170,7 +167,7 @@ static void jit_cache_block(struct jit *jit, struct jit_block *block) {
}
static void jit_free_block(struct jit *jit, struct jit_block *block) {
jit_invalidate_block(jit, block, 0);
jit_invalidate_block(jit, block);
free(block->source_map);
free(block->fastmem);
@ -202,22 +199,6 @@ static struct jit_block *jit_alloc_block(struct jit *jit, uint32_t guest_addr,
/* allocate meta data structs for the original guest code */
block->source_map = calloc(block->guest_size, sizeof(void *));
block->fastmem = calloc(block->guest_size, sizeof(int8_t));
/* for debug builds, fastmem can be troublesome when running under gdb or
lldb. when doing so, SIGSEGV handling can be completely disabled with:
handle SIGSEGV nostop noprint pass
however, then legitimate SIGSEGV will also not be handled by the debugger.
as of this writing, there is no way to configure the debugger to ignore the
signal initially, letting us try to handle it, and then handling it in the
case that we do not (e.g. because it was not a fastmem-related segfault).
because of this, fastmem is default disabled for debug builds to cause less
headaches */
#ifdef NDEBUG
for (int i = 0; i < block->guest_size; i++) {
block->fastmem[i] = 1;
}
#endif
return block;
}
@ -249,7 +230,7 @@ void jit_invalidate_code(struct jit *jit) {
struct rb_node *next = rb_next(it);
struct jit_block *block = container_of(it, struct jit_block, it);
jit_invalidate_block(jit, block, 0);
jit_invalidate_block(jit, block);
it = next;
}
@ -318,25 +299,6 @@ static void jit_emit_callback(struct jit *jit, int type, uint32_t guest_addr,
}
}
static void jit_promote_fastmem(struct jit *jit, struct jit_block *block,
struct ir *ir) {
uint32_t last_addr = block->guest_addr;
list_for_each_entry(blk, &ir->blocks, struct ir_block, it) {
list_for_each_entry_safe(instr, &blk->instrs, struct ir_instr, it) {
int fastmem = block->fastmem[last_addr - block->guest_addr];
if (instr->op == OP_SOURCE_INFO) {
last_addr = instr->arg[0]->i32;
} else if (instr->op == OP_LOAD_GUEST && fastmem) {
instr->op = OP_LOAD_FAST;
} else if (instr->op == OP_STORE_GUEST && fastmem) {
instr->op = OP_STORE_FAST;
}
}
}
}
void jit_compile_code(struct jit *jit, uint32_t guest_addr) {
PROF_ENTER("cpu", "jit_compile_block");
@ -379,7 +341,6 @@ void jit_compile_code(struct jit *jit, uint32_t guest_addr) {
}
/* run optimization passes */
jit_promote_fastmem(jit, block, &ir);
cfa_run(jit->cfa, &ir);
lse_run(jit->lse, &ir);
cprop_run(jit->cprop, &ir);
@ -434,23 +395,6 @@ static int jit_handle_exception(void *data, struct exception_state *ex) {
return 0;
}
/* disable fastmem optimizations for it on future compiles */
int found = 0;
for (int i = 0; i < block->guest_size; i++) {
/* ignore empty entries */
if (!block->source_map[i]) {
continue;
}
if ((uintptr_t)block->source_map[i] > ex->pc) {
break;
}
found = i;
}
block->fastmem[found] = 0;
/* invalidate the block so it's recompiled on the next access */
jit_invalidate_block(jit, block, 1);
return 1;
}

View File

@ -17,7 +17,6 @@ struct val;
enum {
JIT_STATE_VALID,
JIT_STATE_INVALID,
JIT_STATE_RECOMPILE,
};
struct jit_block {

View File

@ -74,7 +74,6 @@ static void cprop_run_block(struct cprop *cprop, struct ir *ir,
/* filter the load instructions out of the "could optimize" stats */
case OP_LOAD_HOST:
case OP_LOAD_GUEST:
case OP_LOAD_FAST:
case OP_LOAD_CONTEXT:
case OP_LOAD_LOCAL:
break;

View File

@ -10,7 +10,7 @@ static void cve_run_block(struct ir *ir, struct ir_block *block) {
list_for_each_entry_safe(instr, &block->instrs, struct ir_instr, it) {
/* eliminate unnecessary sext / zext operations */
if (instr->op == OP_LOAD_HOST || instr->op == OP_LOAD_GUEST ||
instr->op == OP_LOAD_FAST || instr->op == OP_LOAD_CONTEXT) {
instr->op == OP_LOAD_CONTEXT) {
enum ir_type memory_type = VALUE_V;
int same_type = 1;
int all_sext = 1;
@ -49,7 +49,7 @@ static void cve_run_block(struct ir *ir, struct ir_block *block) {
STAT_zext_removed++;
}
} else if (instr->op == OP_STORE_HOST || instr->op == OP_STORE_GUEST ||
instr->op == OP_STORE_FAST || instr->op == OP_STORE_CONTEXT) {
instr->op == OP_STORE_CONTEXT) {
struct ir_value *store_value = instr->arg[1];
if (store_value->def && store_value->def->op == OP_TRUNC) {