separated T and S bits from sh4 status register into their own context members, sr_t and sr_s, reducing codegen by around 15%

This commit is contained in:
Anthony Pesch 2016-12-30 15:41:29 -08:00
parent d658bb2bfe
commit b2fd497cda
6 changed files with 143 additions and 95 deletions

View File

@ -63,11 +63,12 @@ void sh4_sr_updated(void *data, uint32_t old_sr) {
prof_counter_add(COUNTER_sh4_sr_updates, 1); prof_counter_add(COUNTER_sh4_sr_updates, 1);
if ((ctx->sr & RB) != (old_sr & RB)) { if ((ctx->sr & RB_MASK) != (old_sr & RB_MASK)) {
sh4_swap_gpr_bank(sh4); sh4_swap_gpr_bank(sh4);
} }
if ((ctx->sr & I) != (old_sr & I) || (ctx->sr & BL) != (old_sr & BL)) { if ((ctx->sr & I_MASK) != (old_sr & I_MASK) ||
(ctx->sr & BL_MASK) != (old_sr & BL_MASK)) {
sh4_intc_update_pending(sh4); sh4_intc_update_pending(sh4);
} }
} }
@ -205,6 +206,16 @@ static void sh4_translate(void *data, uint32_t addr, struct ir *ir, int fastmem,
*size = as.size; *size = as.size;
} }
void sh4_implode_sr(struct sh4 *sh4) {
sh4->ctx.sr &= ~(S_MASK | T_MASK);
sh4->ctx.sr |= (sh4->ctx.sr_s << S_BIT) | (sh4->ctx.sr_t << T_BIT);
}
void sh4_explode_sr(struct sh4 *sh4) {
sh4->ctx.sr_t = (sh4->ctx.sr & T_MASK) >> T_BIT;
sh4->ctx.sr_s = (sh4->ctx.sr & S_MASK) >> S_BIT;
}
void sh4_clear_interrupt(struct sh4 *sh4, enum sh4_interrupt intr) { void sh4_clear_interrupt(struct sh4 *sh4, enum sh4_interrupt intr) {
sh4->requested_interrupts &= ~sh4->sort_id[intr]; sh4->requested_interrupts &= ~sh4->sort_id[intr];
sh4_intc_update_pending(sh4); sh4_intc_update_pending(sh4);

View File

@ -83,6 +83,8 @@ void sh4_destroy(struct sh4 *sh);
void sh4_reset(struct sh4 *sh4, uint32_t pc); void sh4_reset(struct sh4 *sh4, uint32_t pc);
void sh4_raise_interrupt(struct sh4 *sh, enum sh4_interrupt intr); void sh4_raise_interrupt(struct sh4 *sh, enum sh4_interrupt intr);
void sh4_clear_interrupt(struct sh4 *sh, enum sh4_interrupt intr); void sh4_clear_interrupt(struct sh4 *sh, enum sh4_interrupt intr);
void sh4_explode_sr(struct sh4 *sh4);
void sh4_implode_sr(struct sh4 *sh4);
void sh4_sr_updated(void *data, uint32_t old_sr); void sh4_sr_updated(void *data, uint32_t old_sr);
void sh4_fpscr_updated(void *data, uint32_t old_fpscr); void sh4_fpscr_updated(void *data, uint32_t old_fpscr);

View File

@ -12,9 +12,9 @@ static struct sh4_interrupt_info sh4_interrupts[NUM_SH_INTERRUPTS] = {
}; };
void sh4_intc_update_pending(struct sh4 *sh4) { void sh4_intc_update_pending(struct sh4 *sh4) {
int min_priority = (sh4->ctx.sr & I) >> 4; int min_priority = (sh4->ctx.sr & I_MASK) >> I_BIT;
uint64_t priority_mask = uint64_t priority_mask =
(sh4->ctx.sr & BL) ? 0 : ~sh4->priority_mask[min_priority]; (sh4->ctx.sr & BL_MASK) ? 0 : ~sh4->priority_mask[min_priority];
sh4->ctx.pending_interrupts = sh4->requested_interrupts & priority_mask; sh4->ctx.pending_interrupts = sh4->requested_interrupts & priority_mask;
} }
@ -28,11 +28,14 @@ int sh4_intc_check_pending(struct sh4 *sh4) {
enum sh4_interrupt intr = sh4->sorted_interrupts[n]; enum sh4_interrupt intr = sh4->sorted_interrupts[n];
struct sh4_interrupt_info *int_info = &sh4_interrupts[intr]; struct sh4_interrupt_info *int_info = &sh4_interrupts[intr];
/* ensure sr is up to date */
sh4_implode_sr(sh4);
*sh4->INTEVT = int_info->intevt; *sh4->INTEVT = int_info->intevt;
sh4->ctx.ssr = sh4->ctx.sr; sh4->ctx.ssr = sh4->ctx.sr;
sh4->ctx.spc = sh4->ctx.pc; sh4->ctx.spc = sh4->ctx.pc;
sh4->ctx.sgr = sh4->ctx.r[15]; sh4->ctx.sgr = sh4->ctx.r[15];
sh4->ctx.sr |= (BL | MD | RB); sh4->ctx.sr |= (BL_MASK | MD_MASK | RB_MASK);
sh4->ctx.pc = sh4->ctx.vbr + 0x600; sh4->ctx.pc = sh4->ctx.vbr + 0x600;
sh4_sr_updated(sh4, sh4->ctx.ssr); sh4_sr_updated(sh4, sh4->ctx.ssr);

View File

@ -4,29 +4,35 @@
#include <stdint.h> #include <stdint.h>
/* SR bits */ /* SR bits */
enum {
/* true / false condition or carry/borrow bit */ /* true / false condition or carry/borrow bit */
T = 0x00000001, #define T_BIT 0
/* specifies a saturation operation for a MAC instruction */ /* specifies a saturation operation for a MAC instruction */
S = 0x00000002, #define S_BIT 1
/* interrupt mask level */ /* interrupt mask level */
I = 0x000000f0, #define I_BIT 4
/* used by the DIV0S, DIV0U, and DIV1 instructions */ /* used by the DIV0S, DIV0U, and DIV1 instructions */
Q = 0x00000100, #define Q_BIT 8
/* used by the DIV0S, DIV0U, and DIV1 instructions */ #define M_BIT 9
M = 0x00000200, /* an FPU instr causes a general FPU disable exception */
/* an FPU instr causes a general FPU disable exception */ #define FD_BIT 15
FD = 0x00008000, /* interrupt requests are masked */
/* interrupt requests are masked */ #define BL_BIT 28
BL = 0x10000000, /* general register bank specifier in privileged mode (set
/* to 1 by a reset, exception, or interrupt) */
* general register bank specifier in privileged mode (set #define RB_BIT 29
* to 1 by a reset, exception, or interrupt) /* processor mode (0 is user mode, 1 is privileged mode) */
*/ #define MD_BIT 30
RB = 0x20000000,
/* processor mode (0 is user mode, 1 is privileged mode) */ #define T_MASK (1u << T_BIT)
MD = 0x40000000 #define S_MASK (1u << S_BIT)
}; #define I_MASK 0xf0
#define Q_MASK (1u << Q_BIT)
#define M_MASK (1u << M_BIT)
#define FD_MASK (1u << FD_BIT)
#define BL_MASK (1u << BL_BIT)
#define RB_MASK (1u << RB_BIT)
#define MD_MASK (1u << MD_BIT)
/* FPSCR bits */ /* FPSCR bits */
enum { enum {
@ -38,41 +44,37 @@ enum {
}; };
struct sh4_ctx { struct sh4_ctx {
/* /* there are 24 32-bit general registers, r0_bank0-r7_bank0, r0_bank1-r7_bank1
* there are 24 32-bit general registers, r0_bank0-r7_bank0, r0_bank1-r7_bank1 and r8-r15. r contains the active bank's r0-r7 as well as r8-r15. ralt
* and r8-r15. r contains the active bank's r0-r7 as well as r8-r15. ralt contains the inactive bank's r0-r7 and is swapped in when the processor
* contains the inactive bank's r0-r7 and is swapped in when the processor mode changes */
* mode changes
*/
uint32_t r[16], ralt[8]; uint32_t r[16], ralt[8];
/* /* there are 32 32-bit floating point registers, fr0-fr15 and xf0-xf15. these
* there are 32 32-bit floating point registers, fr0-fr15 and xf0-xf15. these registers are banked, and swapped with eachother when the bank bit of
* registers are banked, and swapped with eachother when the bank bit of FPSCR changes. in addition, fr0fr15 can be used as the eight registers
* FPSCR changes. in addition, fr0fr15 can be used as the eight registers dr0/2/4/6/8/10/12/14 (double-precision, or pair registers) or the four
* dr0/2/4/6/8/10/12/14 (double-precision, or pair registers) or the four registers fv0/4/8/12 (vector registers). while xf0-xf15 can be used as
* registers fv0/4/8/12 (vector registers). while xf0-xf15 can be used as the eight registers xd0/2/4/6/8/10/12/14 (pair registers) or register
* the eight registers xd0/2/4/6/8/10/12/14 (pair registers) or register matrix XMTRX
* matrix XMTRX
*
* note, the sh4 does not support endian conversion for 64-bit data.
* therefore, if 64-bit floating point access is performed in little endian
* mode, the upper and lower 32 bits will be reversed. for example, dr2
* aliases fr2 and fr3, but fr3 is actually the low-order word
*
* in order to avoid swapping the words in every double-precision opcode, the
* mapping for each pair of single-precision registers is instead swapped by
* XOR'ing the actual index with 1. for example, fr2 becomes fr[3] and fr3
* becomes fr[2], enabling dr2 to perfectly alias fr[2]
* note note, this incorrectly causes fv registers to be swizzled. fv0 should note, the sh4 does not support endian conversion for 64-bit data.
* be loaded as {fr0, fr1, fr2, fr3} but it's actually loaded as therefore, if 64-bit floating point access is performed in little endian
* {fr1, fr0, fr3, fr2}. however, due to the way the FV registers are mode, the upper and lower 32 bits will be reversed. for example, dr2
* used (FIPR and FTRV) this doesn't actually affect the results aliases fr2 and fr3, but fr3 is actually the low-order word
*/
in order to avoid swapping the words in every double-precision opcode, the
mapping for each pair of single-precision registers is instead swapped by
XOR'ing the actual index with 1. for example, fr2 becomes fr[3] and fr3
becomes fr[2], enabling dr2 to perfectly alias fr[2]
note note, this incorrectly causes fv registers to be swizzled. fv0 should
be loaded as {fr0, fr1, fr2, fr3} but it's actually loaded as
{fr1, fr0, fr3, fr2}. however, due to the way the FV registers are
used (FIPR and FTRV) this doesn't actually affect the results */
uint32_t fr[16], xf[16]; uint32_t fr[16], xf[16];
uint32_t pc, pr, sr, sr_qm, fpscr; uint32_t pc, pr, sr, sr_t, sr_s, sr_qm, fpscr;
uint32_t dbr, gbr, vbr; uint32_t dbr, gbr, vbr;
uint32_t fpul, mach, macl; uint32_t fpul, mach, macl;
uint32_t sgr, spc, ssr; uint32_t sgr, spc, ssr;

View File

@ -53,9 +53,11 @@ static emit_cb emit_callbacks[NUM_SH4_OPS] = {
#define load_xfr(n, type) ir_load_xfr(ir, n, type) #define load_xfr(n, type) ir_load_xfr(ir, n, type)
#define store_xfr(n, v) ir_store_xfr(ir, n, v) #define store_xfr(n, v) ir_store_xfr(ir, n, v)
#define load_sr() ir_load_sr(ir) #define load_sr() ir_load_sr(ir)
#define store_sr(v, update) ir_store_sr(frontend, ir, v, update) #define store_sr(v) ir_store_sr(frontend, ir, v)
#define load_t() ir_load_t(ir) #define load_t() ir_load_t(ir)
#define store_t(v) ir_store_t(frontend, ir, v) #define store_t(v) ir_store_t(frontend, ir, v)
#define load_s() ir_load_s(ir)
#define store_s(v) ir_store_s(frontend, ir, v)
#define load_gbr() ir_load_gbr(ir) #define load_gbr() ir_load_gbr(ir)
#define store_gbr(v) ir_store_gbr(ir, v) #define store_gbr(v) ir_store_gbr(ir, v)
#define load_fpscr() ir_load_fpscr(ir) #define load_fpscr() ir_load_fpscr(ir)
@ -120,41 +122,63 @@ static void ir_store_xfr(struct ir *ir, int n, struct ir_value *v) {
} }
static struct ir_value *ir_load_sr(struct ir *ir) { static struct ir_value *ir_load_sr(struct ir *ir) {
return ir_load_context(ir, offsetof(struct sh4_ctx, sr), VALUE_I32); struct ir_value *sr =
ir_load_context(ir, offsetof(struct sh4_ctx, sr), VALUE_I32);
/* inlined version of sh4_implode_sr */
struct ir_value *sr_t =
ir_load_context(ir, offsetof(struct sh4_ctx, sr_t), VALUE_I32);
struct ir_value *sr_s =
ir_load_context(ir, offsetof(struct sh4_ctx, sr_s), VALUE_I32);
sr = ir_and(ir, sr, ir_alloc_i32(ir, ~(S_MASK | T_MASK)));
sr = ir_or(ir, sr, sr_t);
sr = ir_or(ir, sr, ir_shli(ir, sr_s, S_BIT));
return sr;
} }
static void ir_store_sr(struct sh4_frontend *frontend, struct ir *ir, static void ir_store_sr(struct sh4_frontend *frontend, struct ir *ir,
struct ir_value *v, int update) { struct ir_value *sr) {
CHECK_EQ(v->type, VALUE_I32); CHECK_EQ(sr->type, VALUE_I32);
struct ir_value *sr_updated = NULL; struct ir_value *sr_updated = ir_alloc_ptr(ir, frontend->sr_updated);
struct ir_value *data = NULL; struct ir_value *data = ir_alloc_ptr(ir, frontend->data);
struct ir_value *old_sr = NULL; struct ir_value *old_sr = load_sr();
if (update) { ir_store_context(ir, offsetof(struct sh4_ctx, sr), sr);
sr_updated = ir_alloc_ptr(ir, frontend->sr_updated);
data = ir_alloc_ptr(ir, frontend->data);
old_sr = load_sr();
}
ir_store_context(ir, offsetof(struct sh4_ctx, sr), v); /* inline version of sh4_explode_sr */
struct ir_value *sr_t = ir_and(ir, sr, ir_alloc_i32(ir, T_MASK));
struct ir_value *sr_s =
ir_lshri(ir, ir_and(ir, sr, ir_alloc_i32(ir, S_MASK)), S_BIT);
ir_store_context(ir, offsetof(struct sh4_ctx, sr_t), sr_t);
ir_store_context(ir, offsetof(struct sh4_ctx, sr_s), sr_s);
if (update) { /* TODO inline the check to see if RB, I or BL bits changed */
ir_call_2(ir, sr_updated, data, old_sr); ir_call_2(ir, sr_updated, data, old_sr);
}
} }
static struct ir_value *ir_load_t(struct ir *ir) { static struct ir_value *ir_load_t(struct ir *ir) {
return ir_and(ir, load_sr(), ir_alloc_i32(ir, T)); return ir_load_context(ir, offsetof(struct sh4_ctx, sr_t), VALUE_I32);
} }
static void ir_store_t(struct sh4_frontend *frontend, struct ir *ir, static void ir_store_t(struct sh4_frontend *frontend, struct ir *ir,
struct ir_value *v) { struct ir_value *v) {
struct ir_value *sr = load_sr(); /* zext the results of ir_cmp_* */
struct ir_value *sr_t = ir_or(ir, sr, ir_alloc_i32(ir, T)); if (v->type != VALUE_I32) {
struct ir_value *sr_not = ir_and(ir, sr, ir_alloc_i32(ir, ~T)); v = ir_zext(ir, v, VALUE_I32);
struct ir_value *res = ir_select(ir, v, sr_t, sr_not); }
store_sr(res, 0); ir_store_context(ir, offsetof(struct sh4_ctx, sr_t), v);
}
static struct ir_value *ir_load_s(struct ir *ir) {
return ir_load_context(ir, offsetof(struct sh4_ctx, sr_s), VALUE_I32);
}
static void ir_store_s(struct sh4_frontend *frontend, struct ir *ir,
struct ir_value *v) {
CHECK_EQ(v->type, VALUE_I32);
ir_store_context(ir, offsetof(struct sh4_ctx, sr_s), v);
} }
static struct ir_value *ir_load_gbr(struct ir *ir) { static struct ir_value *ir_load_gbr(struct ir *ir) {
@ -609,6 +633,7 @@ EMITTER(ADDC) {
struct ir_value *not_v = ir_not(ir, v); struct ir_value *not_v = ir_not(ir, v);
struct ir_value *carry = ir_and(ir, or_rnrm, not_v); struct ir_value *carry = ir_and(ir, or_rnrm, not_v);
carry = ir_or(ir, and_rnrm, carry); carry = ir_or(ir, and_rnrm, carry);
carry = ir_lshri(ir, carry, 31);
store_t(carry); store_t(carry);
} }
@ -624,7 +649,8 @@ EMITTER(ADDV) {
/* compute overflow flag, taken from Hacker's Delight */ /* compute overflow flag, taken from Hacker's Delight */
struct ir_value *xor_vrn = ir_xor(ir, v, rn); struct ir_value *xor_vrn = ir_xor(ir, v, rn);
struct ir_value *xor_vrm = ir_xor(ir, v, rm); struct ir_value *xor_vrm = ir_xor(ir, v, rm);
struct ir_value *overflow = ir_lshri(ir, ir_and(ir, xor_vrn, xor_vrm), 31); struct ir_value *overflow = ir_and(ir, xor_vrn, xor_vrm);
overflow = ir_lshri(ir, overflow, 31);
store_t(overflow); store_t(overflow);
} }
@ -731,7 +757,8 @@ EMITTER(DIV0S) {
ir_store_context(ir, offsetof(struct sh4_ctx, sr_qm), ir_not(ir, qm)); ir_store_context(ir, offsetof(struct sh4_ctx, sr_qm), ir_not(ir, qm));
/* msb of Q ^ M -> T */ /* msb of Q ^ M -> T */
store_t(ir_lshri(ir, qm, 31)); struct ir_value *t = ir_lshri(ir, qm, 31);
store_t(t);
} }
// code cycles t-bit // code cycles t-bit
@ -740,8 +767,7 @@ EMITTER(DIV0S) {
EMITTER(DIV0U) { EMITTER(DIV0U) {
ir_store_context(ir, offsetof(struct sh4_ctx, sr_qm), ir_store_context(ir, offsetof(struct sh4_ctx, sr_qm),
ir_alloc_i32(ir, 0x80000000)); ir_alloc_i32(ir, 0x80000000));
store_t(ir_alloc_i32(ir, 0));
store_sr(ir_and(ir, load_sr(), ir_alloc_i32(ir, ~T)), 0);
} }
// code cycles t-bit // code cycles t-bit
@ -780,7 +806,8 @@ EMITTER(DIV1) {
ir_store_context(ir, offsetof(struct sh4_ctx, sr_qm), qm); ir_store_context(ir, offsetof(struct sh4_ctx, sr_qm), qm);
/* set T to output bit (which happens to be Q == M) */ /* set T to output bit (which happens to be Q == M) */
store_t(ir_lshri(ir, qm, 31)); struct ir_value *t = ir_lshri(ir, qm, 31);
store_t(t);
} }
// DMULS.L Rm,Rn // DMULS.L Rm,Rn
@ -893,6 +920,7 @@ EMITTER(NEGC) {
struct ir_value *v = ir_sub(ir, ir_neg(ir, rm), t); struct ir_value *v = ir_sub(ir, ir_neg(ir, rm), t);
store_gpr(i->Rn, v); store_gpr(i->Rn, v);
struct ir_value *carry = ir_or(ir, t, rm); struct ir_value *carry = ir_or(ir, t, rm);
carry = ir_lshri(ir, carry, 31);
store_t(carry); store_t(carry);
} }
@ -916,6 +944,7 @@ EMITTER(SUBC) {
struct ir_value *l = ir_and(ir, ir_not(ir, rn), rm); struct ir_value *l = ir_and(ir, ir_not(ir, rn), rm);
struct ir_value *r = ir_and(ir, ir_or(ir, ir_not(ir, rn), rm), v); struct ir_value *r = ir_and(ir, ir_or(ir, ir_not(ir, rn), rm), v);
struct ir_value *carry = ir_or(ir, l, r); struct ir_value *carry = ir_or(ir, l, r);
carry = ir_lshri(ir, carry, 31);
store_t(carry); store_t(carry);
} }
@ -929,7 +958,8 @@ EMITTER(SUBV) {
// compute overflow flag, taken from Hacker's Delight // compute overflow flag, taken from Hacker's Delight
struct ir_value *xor_rnrm = ir_xor(ir, rn, rm); struct ir_value *xor_rnrm = ir_xor(ir, rn, rm);
struct ir_value *xor_vrn = ir_xor(ir, v, rn); struct ir_value *xor_vrn = ir_xor(ir, v, rn);
struct ir_value *overflow = ir_lshri(ir, ir_and(ir, xor_rnrm, xor_vrn), 31); struct ir_value *overflow = ir_and(ir, xor_rnrm, xor_vrn);
overflow = ir_lshri(ir, overflow, 31);
store_t(overflow); store_t(overflow);
} }
@ -1319,9 +1349,7 @@ EMITTER(CLRMAC) {
} }
EMITTER(CLRS) { EMITTER(CLRS) {
struct ir_value *sr = load_sr(); store_s(ir_alloc_i32(ir, 0));
sr = ir_and(ir, sr, ir_alloc_i32(ir, ~S));
store_sr(sr, 1);
} }
// code cycles t-bit // code cycles t-bit
@ -1334,7 +1362,7 @@ EMITTER(CLRT) {
// LDC Rm,SR // LDC Rm,SR
EMITTER(LDCSR) { EMITTER(LDCSR) {
struct ir_value *rm = load_gpr(i->Rm, VALUE_I32); struct ir_value *rm = load_gpr(i->Rm, VALUE_I32);
store_sr(rm, 1); store_sr(rm);
} }
// LDC Rm,GBR // LDC Rm,GBR
@ -1378,7 +1406,7 @@ EMITTER(LDCRBANK) {
EMITTER(LDCMSR) { EMITTER(LDCMSR) {
struct ir_value *addr = load_gpr(i->Rm, VALUE_I32); struct ir_value *addr = load_gpr(i->Rm, VALUE_I32);
struct ir_value *v = load_guest(addr, VALUE_I32); struct ir_value *v = load_guest(addr, VALUE_I32);
store_sr(v, 1); store_sr(v);
/* reload Rm, sr store could have swapped banks */ /* reload Rm, sr store could have swapped banks */
addr = load_gpr(i->Rm, VALUE_I32); addr = load_gpr(i->Rm, VALUE_I32);
addr = ir_add(ir, addr, ir_alloc_i32(ir, 4)); addr = ir_add(ir, addr, ir_alloc_i32(ir, 4));
@ -1524,15 +1552,14 @@ EMITTER(RTE) {
ir_load_context(ir, offsetof(struct sh4_ctx, spc), VALUE_I32); ir_load_context(ir, offsetof(struct sh4_ctx, spc), VALUE_I32);
struct ir_value *ssr = struct ir_value *ssr =
ir_load_context(ir, offsetof(struct sh4_ctx, ssr), VALUE_I32); ir_load_context(ir, offsetof(struct sh4_ctx, ssr), VALUE_I32);
store_sr(ssr, 1); store_sr(ssr);
emit_delay_instr(); emit_delay_instr();
branch(spc); branch(spc);
} }
// SETS // SETS
EMITTER(SETS) { EMITTER(SETS) {
struct ir_value *sr = ir_or(ir, load_sr(), ir_alloc_i32(ir, S)); store_s(ir_alloc_i32(ir, 1));
store_sr(sr, 1);
} }
// SETT // SETT

View File

@ -107,6 +107,9 @@ static void run_sh4_test(struct dreamcast *dc, const struct sh4_test *test) {
dc_tick(dc, 1); dc_tick(dc, 1);
} }
/* ensure sh4 sr is up to date before testing against it */
sh4_implode_sr(dc->sh4);
/* validate out registers */ /* validate out registers */
for (int i = 0; i < sh4_num_test_regs; i++) { for (int i = 0; i < sh4_num_test_regs; i++) {
struct sh4_test_reg *reg = &sh4_test_regs[i]; struct sh4_test_reg *reg = &sh4_test_regs[i];
@ -138,7 +141,7 @@ TEST(sh4_x64) {
{0}, \ {0}, \
{fr1, fr0, fr3, fr2, fr5, fr4, fr7, fr6, fr9, fr8, fr11, fr10, fr13, fr12, fr15, fr14}, \ {fr1, fr0, fr3, fr2, fr5, fr4, fr7, fr6, fr9, fr8, fr11, fr10, fr13, fr12, fr15, fr14}, \
{xf1, xf0, xf3, xf2, xf5, xf4, xf7, xf6, xf9, xf8, xf11, xf10, xf13, xf12, xf15, xf14}, \ {xf1, xf0, xf3, xf2, xf5, xf4, xf7, xf6, xf9, xf8, xf11, xf10, xf13, xf12, xf15, xf14}, \
0, 0, 0, 0, fpscr, \ 0, 0, 0, 0, 0, 0, fpscr, \
0, 0, 0, \ 0, 0, 0, \
0, 0, 0, \ 0, 0, 0, \
0, 0, 0, \ 0, 0, 0, \