Compiles and links for linux x86. No worky yet.

Woohoo, a new gcc bug!
The x86 linux dynarec just got a couple of months further away thanks to gcc being totally retarded as some things (namely, fastcalls). And yes, reicast depends on fastcalls for a lot of things, and no, i don't care if you don't like fastcalls.
This commit is contained in:
Stefanos Kornilios Mitsis Poiitidis 2014-04-25 12:52:08 +03:00
parent ad054f1b96
commit 3eb27fc109
11 changed files with 2689 additions and 30 deletions

View File

@ -20,7 +20,7 @@ ifndef NOT_ARM
endif
ifdef X86_REC
RZDCY_MODULES += rec-x86/
RZDCY_MODULES += rec-x86/ emitter/
endif
ifndef NO_REND

View File

@ -3,6 +3,7 @@
#pragma warning(disable:4244)
#pragma warning(disable:4245)
#include "../types.h"
#include "x86_emitter.h"
bool IsS8(u32 value)
{
@ -183,6 +184,8 @@ x86_block_externs* x86_block::GetExterns()
return rv;
}
#if 0
#include "windows.h"
/*void x86_block::CopyTo(void* to)
{
@ -194,6 +197,8 @@ x86_block_externs* x86_block::GetExterns()
}
*/
#endif
//wut ?
void x86_block::ApplyPatches(u8* base)
{
@ -409,7 +414,10 @@ void x86_block::Emit(x86_opcode_class op,x86_reg reg1,x86_reg reg2,u32 imm)
//reg,mrm,imm, reg1 is written
void x86_block::Emit(x86_opcode_class op,x86_reg reg,x86_ptr mem,u32 imm)
{
ME_op_3_imm(op,reg,c_mrm(mem),imm);
//GCC bitches about using this directly. It doesn't complain for the other uses though
//go figure ....
x86_mrm_t mrm = c_mrm(mem);
ME_op_3_imm(op,reg,mrm,imm);
}
//reg,mrm,imm, reg1 is written
@ -449,19 +457,19 @@ u8 EncodeDisp(u32 disp,x86_mrm_t* to,u8 flags)
verify(false);
return 0;
}
__declspec(dllexport) x86_mrm_t x86_mrm(x86_reg base)
/*__declspec(dllexport) */x86_mrm_t x86_mrm(x86_reg base)
{
return x86_mrm(base,NO_REG,sib_scale_1,0);
}
__declspec(dllexport) x86_mrm_t x86_mrm(x86_reg base,x86_ptr disp)
/*__declspec(dllexport) */x86_mrm_t x86_mrm(x86_reg base,x86_ptr disp)
{
return x86_mrm(base,NO_REG,sib_scale_1,disp);
}
__declspec(dllexport) x86_mrm_t x86_mrm(x86_reg index,x86_sib_scale scale,x86_ptr disp)
/*__declspec(dllexport) */x86_mrm_t x86_mrm(x86_reg index,x86_sib_scale scale,x86_ptr disp)
{
return x86_mrm(NO_REG,index,scale,disp);
}
__declspec(dllexport) x86_mrm_t x86_mrm(x86_reg base,x86_reg index)
/*__declspec(dllexport) */x86_mrm_t x86_mrm(x86_reg base,x86_reg index)
{
return x86_mrm(base,index,sib_scale_1,0);
}

View File

@ -303,9 +303,9 @@ public:
~x86_block();
void x86_buffer_ensure(u32 size);
void x86_block::write8(u32 value);
void x86_block::write16(u32 value);
void x86_block::write32(u32 value);
void write8(u32 value);
void write16(u32 value);
void write32(u32 value);
//init things
void Init(dyna_reallocFP* ral,dyna_finalizeFP* alf);
@ -332,45 +332,45 @@ public:
//opcode Emitters
//no param
void x86_block::Emit(x86_opcode_class op);
void Emit(x86_opcode_class op);
//1 param
//reg
void x86_block::Emit(x86_opcode_class op,x86_reg reg);
void Emit(x86_opcode_class op,x86_reg reg);
//smrm
void x86_block::Emit(x86_opcode_class op,x86_ptr mem);
void Emit(x86_opcode_class op,x86_ptr mem);
//mrm
void x86_block::Emit(x86_opcode_class op,x86_mrm_t mrm);
void Emit(x86_opcode_class op,x86_mrm_t mrm);
//imm
void x86_block::Emit(x86_opcode_class op,u32 imm);
void Emit(x86_opcode_class op,u32 imm);
//ptr_imm
void x86_block::Emit(x86_opcode_class op,x86_ptr_imm disp);
void Emit(x86_opcode_class op,x86_ptr_imm disp);
//lbl
void x86_block::Emit(x86_opcode_class op,x86_Label* lbl);
void Emit(x86_opcode_class op,x86_Label* lbl);
//2 param
//reg,reg, reg1 is written
void x86_block::Emit(x86_opcode_class op,x86_reg reg1,x86_reg reg2);
void Emit(x86_opcode_class op,x86_reg reg1,x86_reg reg2);
//reg,smrm, reg is written
void x86_block::Emit(x86_opcode_class op,x86_reg reg,x86_ptr mem);
void Emit(x86_opcode_class op,x86_reg reg,x86_ptr mem);
//reg,mrm, reg is written
void x86_block::Emit(x86_opcode_class op,x86_reg reg1,x86_mrm_t mrm);
void Emit(x86_opcode_class op,x86_reg reg1,x86_mrm_t mrm);
//reg,imm, reg is written
void x86_block::Emit(x86_opcode_class op,x86_reg reg,u32 imm);
void Emit(x86_opcode_class op,x86_reg reg,u32 imm);
//smrm,reg, mem is written
void x86_block::Emit(x86_opcode_class op,x86_ptr mem,x86_reg reg);
void Emit(x86_opcode_class op,x86_ptr mem,x86_reg reg);
//smrm,imm, mem is written
void x86_block::Emit(x86_opcode_class op,x86_ptr mem,u32 imm);
void Emit(x86_opcode_class op,x86_ptr mem,u32 imm);
//mrm,reg, mrm is written
void x86_block::Emit(x86_opcode_class op,x86_mrm_t mrm,x86_reg reg);
void Emit(x86_opcode_class op,x86_mrm_t mrm,x86_reg reg);
//mrm,imm, mrm is written
void x86_block::Emit(x86_opcode_class op,x86_mrm_t mrm,u32 imm);
void Emit(x86_opcode_class op,x86_mrm_t mrm,u32 imm);
//3 param
//reg,reg,imm, reg1 is written
void x86_block::Emit(x86_opcode_class op,x86_reg reg1,x86_reg reg2,u32 imm);
void Emit(x86_opcode_class op,x86_reg reg1,x86_reg reg2,u32 imm);
//reg,mrm,imm, reg1 is written
void x86_block::Emit(x86_opcode_class op,x86_reg reg,x86_ptr mem,u32 imm);
void Emit(x86_opcode_class op,x86_reg reg,x86_ptr mem,u32 imm);
//reg,mrm,imm, reg1 is written
void x86_block::Emit(x86_opcode_class op,x86_reg reg,x86_mrm_t mrm,u32 imm);
void Emit(x86_opcode_class op,x86_reg reg,x86_mrm_t mrm,u32 imm);
};

View File

@ -58,6 +58,11 @@
}
*/
#include "build.h"
#if BUILD_COMPILER == COMPILER_GCC
#define __fastcall
#endif
enum enc_param

148
core/rec-x86/lin86_asm.S Normal file
View File

@ -0,0 +1,148 @@
.section .text
.intel_syntax noprefix
.extern rdv_LinkBlock
.globl p_sh4rcb
.globl gas_offs
.globl rdv_FailedToFindBlock
.globl cycle_counter
.globl loop_no_update
.globl intc_sched
.globl bm_GetCode
.globl cycle_counter
.globl UpdateSystem
.globl rdv_DoInterrupts
.globl rdv_BlockCheckFail
###########################################
.globl ngen_LinkBlock_Shared_stub
.type ngen_LinkBlock_Shared_stub, @function
ngen_LinkBlock_Shared_stub:
pop ecx
sub ecx,5
call rdv_LinkBlock
jmp eax
###########################################
.globl ngen_LinkBlock_cond_Next_stub
.type ngen_LinkBlock_cond_Next_stub, @function
ngen_LinkBlock_cond_Next_stub:
mov edx,0
jmp ngen_LinkBlock_Shared_stub
###########################################
.globl ngen_LinkBlock_cond_Branch_stub
.type ngen_LinkBlock_cond_Branch_stub, @function
ngen_LinkBlock_cond_Branch_stub:
mov edx,1
jmp ngen_LinkBlock_Shared_stub
###########################################
.globl ngen_LinkBlock_Generic_stub
.type ngen_LinkBlock_Generic_stub, @function
# u32 gas_offs=offsetof(Sh4RCB,cntx.jdyn);
ngen_LinkBlock_Generic_stub:
mov edx,p_sh4rcb
add edx,gas_offs
mov edx,[edx]
jmp ngen_LinkBlock_Shared_stub
###########################################
.globl ngen_FailedToFindBlock_
.type ngen_FailedToFindBlock_, @function
ngen_FailedToFindBlock_:
mov ecx,esi
call rdv_FailedToFindBlock
jmp eax
###########################################
#define SH4_TIMESLICE 448
.globl ngen_mainloop
.type ngen_mainloop, @function
ngen_mainloop:
push esi
push edi
push ebp
push ebx
mov ecx,0xA0000000
mov dword ptr cycle_counter, 448 #SH4_TIMESLICE
mov dword ptr loop_no_update,offset no_update
mov dword ptr intc_sched,offset intc_sched_offs
mov eax,0
# next_pc _MUST_ be on ecx
no_update:
mov esi,ecx
call _Z10bm_GetCodej #bm_GetCode
jmp eax
intc_sched_offs:
add dword ptr cycle_counter, 448 #SH4_TIMESLICE
call UpdateSystem
cmp eax,0
jnz do_iter
ret
do_iter:
pop ecx
call rdv_DoInterrupts
mov ecx,eax
# cmp byte ptr [sh4_int_bCpuRun],0
# jz cleanup
jmp no_update
cleanup:
pop ebx
pop ebp
pop edi
pop esi
ret
###########################################
.globl ngen_blockcheckfail
.type ngen_blockcheckfail, @function
ngen_blockcheckfail:
call rdv_BlockCheckFail
jmp eax
###########################################
.globl ngen_blockcheckfail2
.type ngen_blockcheckfail2, @function
ngen_blockcheckfail2:
int 3
call rdv_BlockCheckFail
jmp eax

View File

@ -0,0 +1,781 @@
#include "types.h"
#ifndef HOST_NO_REC
#include "win86_ngen.h"
struct DynaRBI: RuntimeBlockInfo
{
x86_block_externs* reloc_info;
virtual ~DynaRBI() { if (reloc_info) reloc_info->Free(); }
virtual u32 Relink();
virtual void Relocate(void* dst)
{
reloc_info->Apply(dst);
}
};
x86_block* x86e;
u32 cycle_counter;
void* loop_no_update;
void* intc_sched;
bool sse_1=true;
bool sse_2=true;
bool sse_3=true;
bool ssse_3=true;
bool mmx=true;
void DetectCpuFeatures()
{
static bool detected=false;
if (detected) return;
detected=true;
#if HOST_OS==OS_WINDOWS
__try
{
__asm addps xmm0,xmm0
}
__except(1)
{
sse_1=false;
}
__try
{
__asm addpd xmm0,xmm0
}
__except(1)
{
sse_2=false;
}
__try
{
__asm addsubpd xmm0,xmm0
}
__except(1)
{
sse_3=false;
}
__try
{
__asm phaddw xmm0,xmm0
}
__except(1)
{
ssse_3=false;
}
__try
{
__asm paddd mm0,mm1
__asm emms;
}
__except(1)
{
mmx=false;
}
#endif
}
#define CSC_SIZE 64
struct csc_et
{
u32 pc;
void* code;
};
csc_et csc[CSC_SIZE<32?32:CSC_SIZE];
#define CSC_SHIFT 1
u32 csc_hash(u32 addr)
{
return (addr>>CSC_SHIFT)&(CSC_SIZE-1);
}
u32 csc_mode=0;
u32 csc_sidx=1;
x86_reg alloc_regs[]={EBX,EBP,ESI,EDI,NO_REG};
x86_reg xmm_alloc_regs[]={XMM7,XMM6,XMM5,XMM4,NO_REG};
f32 ALIGN(16) thaw_regs[4];
void x86_reg_alloc::Preload(u32 reg,x86_reg nreg)
{
x86e->Emit(op_mov32,nreg,GetRegPtr(reg));
}
void x86_reg_alloc::Writeback(u32 reg,x86_reg nreg)
{
x86e->Emit(op_mov32,GetRegPtr(reg),nreg);
}
void x86_reg_alloc::Preload_FPU(u32 reg,x86_reg nreg)
{
x86e->Emit(op_movss,nreg,GetRegPtr(reg));
}
void x86_reg_alloc::Writeback_FPU(u32 reg,x86_reg nreg)
{
x86e->Emit(op_movss,GetRegPtr(reg),nreg);
}
#ifdef PROF2
extern u32 flsh;
#endif
void x86_reg_alloc::FreezeXMM()
{
x86_reg* fpreg=xmm_alloc_regs;
f32* slpc=thaw_regs;
while(*fpreg!=-1)
{
if (SpanNRegfIntr(current_opid,*fpreg))
x86e->Emit(op_movss,slpc++,*fpreg);
fpreg++;
}
#ifdef PROF2
x86e->Emit(op_add32,&flsh,1);
#endif
}
void x86_reg_alloc::ThawXMM()
{
x86_reg* fpreg=xmm_alloc_regs;
f32* slpc=thaw_regs;
while(*fpreg!=-1)
{
if (SpanNRegfIntr(current_opid,*fpreg))
x86e->Emit(op_movss,*fpreg,slpc++);
fpreg++;
}
}
x86_reg_alloc reg;
u32 ret_hit,ret_all,ret_stc;
void csc_push(RuntimeBlockInfo* block)
{
if (csc_mode==0)
{
x86e->Emit(op_mov32,&csc[csc_hash(block->NextBlock)].pc,block->NextBlock);
}
else if (csc_mode==1)
{
//x86e->Emit(op_int3);
x86e->Emit(op_ror32,&csc_sidx,1);
x86e->Emit(op_bsr32,EAX,&csc_sidx);
x86e->Emit(op_mov32,x86_mrm(EAX,sib_scale_8,x86_ptr(csc)),block->NextBlock);
}
}
void DYNACALL csc_fail(u32 addr,u32 addy)
{
if (csc_mode==0)
{
//too bad ?
}
else if (csc_mode==1)
{
u32 fail_idx=(csc_sidx>>1)|(csc_sidx<<31);
printf("Ret Mismatch: %08X instead of %08X!\n",addr,addy);
}
}
void csc_pop(RuntimeBlockInfo* block)
{
x86_Label* end=x86e->CreateLabel(false,8);
x86_Label* try_dyn=x86e->CreateLabel(false,8);
//static guess
x86_Label* stc_hit=x86e->CreateLabel(false,8);
x86e->Emit(op_cmp32,ECX,&block->csc_RetCache);
x86e->Emit(op_je,stc_hit);
//if !eq
{
//if (cached) goto dyn
x86e->Emit(op_cmp32,&block->csc_RetCache,-1);
x86e->Emit(op_jne,try_dyn);
//else, do cache
x86e->Emit(op_mov32,&block->csc_RetCache,ECX);
}
x86e->MarkLabel(stc_hit);
x86e->Emit(op_add32,&ret_stc,1);
if (csc_mode==1)
x86e->Emit(op_rol32,&csc_sidx,1);
x86e->Emit(op_jmp,end);
x86e->MarkLabel(try_dyn);
if (csc_mode==0)
{
//csc !
//x86e->Emit(op_int3);
x86e->Emit(op_mov32,ECX,GetRegPtr(reg_pc_dyn));
x86e->Emit(op_mov32,EAX,ECX);
x86e->Emit(op_shr32,EAX,CSC_SHIFT);
x86e->Emit(op_and32,EAX,CSC_SIZE-1);
x86e->Emit(op_cmp32,x86_mrm(EAX,sib_scale_8,x86_ptr(csc)),ECX);
}
else if (csc_mode==1)
{
//x86e->Emit(op_int3);
x86e->Emit(op_mov32,ECX,GetRegPtr(reg_pc_dyn));
x86e->Emit(op_bsr32,EAX,&csc_sidx);
x86e->Emit(op_rol32,&csc_sidx,1);
x86e->Emit(op_mov32,EDX,x86_mrm(EAX,sib_scale_8,x86_ptr(csc)));
x86e->Emit(op_cmp32,EDX,ECX);
}
x86e->Emit(op_jne,end);
x86e->Emit(op_add32,&ret_hit,1);
//x86e->Emit(op_jmp,end);
x86e->MarkLabel(end);
x86e->Emit(op_add32,&ret_all,1);
}
void DYNACALL PrintBlock(u32 pc)
{
printf("block: 0x%08X\n",pc);
for (int i=0;i<16;i++)
printf("%08X ",r[i]);
printf("\n");
}
u32* GetRegPtr(u32 reg)
{
return Sh4_int_GetRegisterPtr((Sh4RegType)reg);
}
u32 cvld;
u32 rdmt[6];
extern u32 memops_t,memops_l;
void CheckBlock(RuntimeBlockInfo* block,x86_ptr_imm place)
{
s32 sz=block->sh4_code_size;
u32 sa=block->addr;
while(sz>0)
{
void* ptr=(void*)GetMemPtr(sa,4);
if (ptr)
{
if (sz==2)
x86e->Emit(op_cmp16,ptr,*(u16*)ptr);
else
x86e->Emit(op_cmp32,ptr,*(u32*)ptr);
x86e->Emit(op_jne,place);
}
sz-=4;
sa+=4;
}
}
void ngen_Compile(RuntimeBlockInfo* block,bool force_checks, bool reset, bool staging,bool optimise)
{
//initialise stuff
DetectCpuFeatures();
((DynaRBI*)block)->reloc_info=0;
//Setup emitter
x86e = new x86_block();
x86e->Init(0,0);
x86e->x86_buff=(u8*)emit_GetCCPtr();
x86e->x86_size=emit_FreeSpace();
x86e->do_realloc=false;
block->code=(DynarecCodeEntry*)emit_GetCCPtr();
x86e->Emit(op_add32,&memops_t,block->memops);
x86e->Emit(op_add32,&memops_l,block->linkedmemops);
//run register allocator
reg.DoAlloc(block,alloc_regs,xmm_alloc_regs);
//block header//
//block invl. checks
x86e->Emit(op_mov32,ECX,block->addr);
CheckBlock(block,force_checks?x86_ptr_imm(ngen_blockcheckfail):x86_ptr_imm(ngen_blockcheckfail2));
//Scheduler
x86_Label* no_up=x86e->CreateLabel(false,8);
x86e->Emit(op_sub32,&cycle_counter,block->guest_cycles);
x86e->Emit(op_jns,no_up);
{
x86e->Emit(op_call,x86_ptr_imm(intc_sched));
}
x86e->MarkLabel(no_up);
//stating counter
if (staging) x86e->Emit(op_sub32,&block->staging_runs,1);
//profiler
if (prof.enable || 1)
x86e->Emit(op_add32,&block->runs,1);
if (prof.enable)
{
if (force_checks)
x86e->Emit(op_add32,&prof.counters.blkrun.force_check,1);
x86e->Emit(op_add32,&prof.counters.blkrun.cycles[block->guest_cycles],1);
}
for (size_t i=0;i<block->oplist.size();i++)
{
shil_opcode* op=&block->oplist[i];
u32 opcd_start=x86e->opcode_count;
if (prof.enable)
{
x86e->Emit(op_add32,&prof.counters.shil.executed[op->op],1);
}
op->host_offs=x86e->x86_indx;
if (prof.enable)
{
set<int> reg_wt;
set<int> reg_rd;
for (int z=0;op->rd.is_reg() && z<op->rd.count();z++)
reg_wt.insert(op->rd._reg+z);
for (int z=0;op->rd2.is_reg() && z<op->rd2.count();z++)
reg_wt.insert(op->rd2._reg+z);
for (int z=0;op->rs1.is_reg() && z<op->rs1.count();z++)
reg_rd.insert(op->rs1._reg+z);
for (int z=0;op->rs2.is_reg() && z<op->rs2.count();z++)
reg_rd.insert(op->rs2._reg+z);
for (int z=0;op->rs3.is_reg() && z<op->rs3.count();z++)
reg_rd.insert(op->rs3._reg+z);
set<int>::iterator iter=reg_wt.begin();
while( iter != reg_wt.end() )
{
if (reg_rd.count(*iter))
{
reg_rd.erase(*iter);
x86e->Emit(op_add32, &prof.counters.ralloc.reg_rw[*iter], 1);
}
else
{
x86e->Emit(op_add32, &prof.counters.ralloc.reg_w[*iter], 1);
}
++iter;
}
iter=reg_rd.begin();
while( iter != reg_rd.end() )
{
x86e->Emit(op_add32,&prof.counters.ralloc.reg_r[*iter],1);
++iter;
}
}
reg.OpBegin(op,i);
ngen_opcode(block,op,x86e,staging,optimise);
if (prof.enable) x86e->Emit(op_add32,&prof.counters.shil.host_ops[op->op],x86e->opcode_count-opcd_start);
reg.OpEnd(op);
}
block->relink_offset=x86e->x86_indx;
block->relink_data=0;
x86e->x86_indx+=block->Relink();
x86e->Generate();
block->host_code_size=x86e->x86_indx;
block->host_opcodes=x86e->opcode_count;
emit_Skip(block->host_code_size);
delete x86e;
x86e=0;
}
u32 DynaRBI::Relink()
{
x86_block* x86e=new x86_block();
x86e->Init(0,0);
x86e->x86_buff=(u8*)code + relink_offset;
x86e->x86_size=512;
x86e->do_realloc=false;
if (BlockType==BET_StaticCall || BlockType==BET_DynamicCall)
{
//csc_push(this);
}
switch(BlockType)
{
case BET_Cond_0:
case BET_Cond_1:
{
x86e->Emit(op_cmp32,GetRegPtr(has_jcond?reg_pc_dyn:reg_sr_T),BlockType&1);
x86_Label* noBranch=x86e->CreateLabel(0,8);
x86e->Emit(op_jne,noBranch);
{
//branch block
if (pBranchBlock)
x86e->Emit(op_jmp,x86_ptr_imm(pBranchBlock->code));
else
x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_cond_Branch_stub));
}
x86e->MarkLabel(noBranch);
{
//no branch block
if (pNextBlock)
x86e->Emit(op_jmp,x86_ptr_imm(pNextBlock->code));
else
x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_cond_Next_stub));
}
}
break;
case BET_DynamicRet:
{
//csc_pop(this);
}
case BET_DynamicCall:
case BET_DynamicJump:
{
if (relink_data==0)
{
if (pBranchBlock)
{
x86e->Emit(op_cmp32,GetRegPtr(reg_pc_dyn),pBranchBlock->addr);
x86e->Emit(op_je,x86_ptr_imm(pBranchBlock->code));
x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_Generic_stub));
}
else
{
x86e->Emit(op_cmp32,GetRegPtr(reg_pc_dyn),0xFABCDECF);
x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_Generic_stub));
x86e->Emit(op_je,x86_ptr_imm(ngen_LinkBlock_Generic_stub));
}
}
else
{
verify(pBranchBlock==0);
x86e->Emit(op_mov32,ECX,GetRegPtr(reg_pc_dyn));
x86e->Emit(op_jmp,x86_ptr_imm(loop_no_update));
}
}
break;
case BET_StaticCall:
case BET_StaticJump:
{
if (pBranchBlock)
x86e->Emit(op_jmp,x86_ptr_imm(pBranchBlock->code));
else
x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_Generic_stub));
break;
}
case BET_StaticIntr:
case BET_DynamicIntr:
if (BlockType==BET_StaticIntr)
{
x86e->Emit(op_mov32,&next_pc,NextBlock);
}
else
{
x86e->Emit(op_mov32,EAX,GetRegPtr(reg_pc_dyn));
x86e->Emit(op_mov32,&next_pc,EAX);
}
x86e->Emit(op_call,x86_ptr_imm(UpdateINTC));
x86e->Emit(op_mov32,ECX,&next_pc);
x86e->Emit(op_jmp,x86_ptr_imm(loop_no_update));
break;
}
x86e->Generate();
return x86e->x86_indx;
}
/*
//10
R S8 B,M
R S16 B,M
R I32 B,M
R F32 B,M
R F32v2 B{,M}
//13
W I8 B,M
W I16 B,M
W I32 B,S,M
W F32 B,S,M
W F32v2 B,S{,M}
*/
extern u8* virt_ram_base;
#include "hw/sh4/sh4_mmr.h"
enum mem_op_type
{
SZ_8,
SZ_16,
SZ_32I,
SZ_32F,
SZ_64F,
};
void gen_hande(u32 w, u32 sz, u32 mode)
{
static const x86_ptr_imm rwm[2][5]=
{
{x86_ptr_imm(&_vmem_ReadMem8SX32),x86_ptr_imm(&_vmem_ReadMem16SX32),x86_ptr_imm(&ReadMem32),x86_ptr_imm(&ReadMem32),x86_ptr_imm(&ReadMem64),},
{x86_ptr_imm(&WriteMem8),x86_ptr_imm(&WriteMem16),x86_ptr_imm(&WriteMem32),x86_ptr_imm(&WriteMem32),x86_ptr_imm(&WriteMem64),}
};
static const x86_opcode_class opcl_i[2][3]=
{
{op_movsx8to32,op_movsx16to32,op_mov32},
{op_mov8,op_mov16,op_mov32}
};
u32 si=x86e->x86_indx;
if (mode==0)
{
//Buffer
x86e->Emit(op_mov32,EAX,ECX);
x86e->Emit(op_and32,ECX,0x1FFFFFFF);
x86_mrm_t buff=x86_mrm(ECX,virt_ram_base);
x86_mrm_t buff4=x86_mrm(ECX,virt_ram_base+4);
if (sz==SZ_8 || sz==SZ_16 || sz==SZ_32I)
{
if (w==0)
x86e->Emit(opcl_i[w][sz],sz==SZ_8?AL:sz==SZ_16?AX:EAX,buff);
else
x86e->Emit(opcl_i[w][sz],buff,sz==SZ_8?DL:sz==SZ_16?DX:EDX);
}
else
{
if (w==0)
{
x86e->Emit(op_movss,XMM0,buff);
if (sz==SZ_64F)
x86e->Emit(op_movss,XMM1,buff4);
}
else
{
x86e->Emit(op_movss,buff,XMM0);
if (sz==SZ_64F)
x86e->Emit(op_movss,buff4,XMM1);
}
}
}
else if (mode==1)
{
//SQ
verify(w==1);
x86e->Emit(op_mov32,EAX,ECX);
x86e->Emit(op_and32,ECX,0x3f);
x86e->Emit(op_shr32,EAX,26);
x86e->Emit(op_cmp32,EAX,0x38);
x86_Label* l=x86e->CreateLabel(false,8);
x86e->Emit(op_je,l);
x86e->Emit(op_int3);
x86e->MarkLabel(l);
if (sz==SZ_32I)
x86e->Emit(op_mov32,x86_mrm(ECX,sq_both),EDX);
else if (sz==SZ_32F || sz==SZ_64F)
{
x86e->Emit(op_movss,x86_mrm(ECX,sq_both),XMM0);
if (sz==SZ_64F)
x86e->Emit(op_movss,x86_mrm(ECX,sq_both+4),XMM1);
}
else
{
die("Can't happen\n");
}
}
else
{
//General
if ((sz==SZ_32F || sz==SZ_64F) && w==1)
{
if (sz==SZ_32F)
{
x86e->Emit(op_movd_xmm_to_r32,EDX,XMM0);
}
else
{
x86e->Emit(op_sub32,ESP,8);
x86e->Emit(op_movss,x86_mrm(ESP,x86_ptr::create(+4)),XMM1);
x86e->Emit(op_movss,x86_mrm(ESP,x86_ptr::create(-0)),XMM0);
}
}
x86e->Emit(op_call,rwm[w][sz]);
if ((sz==SZ_32F || sz==SZ_64F) && w==0)
{
x86e->Emit(op_movd_xmm_from_r32,XMM0,EAX);
if (sz==SZ_64F)
{
x86e->Emit(op_movd_xmm_from_r32,XMM1,EDX);
}
}
}
x86e->Emit(op_ret);
emit_Skip(x86e->x86_indx-si);
}
unat mem_code_base=0;
unat mem_code_end=0;
void* mem_code[3][2][5];
void ngen_init()
{
//Setup emitter
x86e = new x86_block();
x86e->Init(0,0);
x86e->x86_buff=(u8*)emit_GetCCPtr();
x86e->x86_size=emit_FreeSpace();
x86e->do_realloc=false;
mem_code_base=(unat)emit_GetCCPtr();
for (int sz=0;sz<5;sz++)
{
for (int w=0;w<2;w++)
{
for (int m=0;m<3;m++)
{
if (m==1 && (sz<=SZ_16 || w==0))
continue;
mem_code[m][w][sz]=emit_GetCCPtr();
gen_hande(w,sz,m);
}
}
}
mem_code_end=(unat)emit_GetCCPtr();
x86e->Generate();
delete x86e;
emit_SetBaseAddr();
}
void ngen_ResetBlocks()
{
}
void ngen_GetFeatures(ngen_features* dst)
{
dst->InterpreterFallback=false;
dst->OnlyDynamicEnds=false;
}
RuntimeBlockInfo* ngen_AllocateBlock()
{
return new DynaRBI();
}
bool ngen_Rewrite(unat& addr,unat retadr,unat acc)
{
if (addr>=mem_code_base && addr<mem_code_end)
{
u32 ca=*(u32*)(retadr-4)+retadr;
x86e = new x86_block();
x86e->Init(0,0);
x86e->x86_buff=(u8*)retadr-5;
x86e->x86_size=emit_FreeSpace();
x86e->do_realloc=false;
for (int i=0;i<5;i++)
{
for (int w=0;w<2;w++)
{
if ((u32)mem_code[0][w][i]==ca)
{
//found !
if ((acc >> 26) == 0x38) //sq ?
{
verify(w == 1);
x86e->Emit(op_call, x86_ptr_imm(mem_code[1][w][i]));
}
else
{
x86e->Emit(op_call, x86_ptr_imm(mem_code[2][w][i]));
}
x86e->Generate();
delete x86e;
addr=retadr-5;
//printf("Patched: %08X for access @ %08X\n",addr,acc);
return true;
}
}
}
die("Failed to match the code :(\n");
return false;
}
else
{
return false;
}
}
#endif

1529
core/rec-x86/win86_il.cpp Normal file

File diff suppressed because it is too large Load Diff

128
core/rec-x86/win86_ngen.cpp Normal file
View File

@ -0,0 +1,128 @@
#include "win86_ngen.h"
#if HOST_OS == OS_WINDOWS
naked void ngen_LinkBlock_Shared_stub()
{
__asm
{
pop ecx;
sub ecx,5;
call rdv_LinkBlock;
jmp eax;
}
}
naked void ngen_LinkBlock_cond_Next_stub()
{
__asm
{
mov edx,0
jmp ngen_LinkBlock_Shared_stub;
}
}
naked void ngen_LinkBlock_cond_Branch_stub()
{
__asm
{
mov edx,1
jmp ngen_LinkBlock_Shared_stub;
}
}
const u32 offs=offsetof(Sh4RCB,cntx.jdyn);
naked void ngen_LinkBlock_Generic_stub()
{
__asm
{
mov edx,[p_sh4rcb];
add edx,[offs];
mov edx,[edx];
jmp ngen_LinkBlock_Shared_stub;
}
}
naked void ngen_FailedToFindBlock_()
{
__asm
{
mov ecx,esi;
call rdv_FailedToFindBlock;
jmp eax;
}
}
void (*ngen_FailedToFindBlock)()=&ngen_FailedToFindBlock_;
naked void ngen_mainloop(void* cntx)
{
__asm
{
push esi;
push edi;
push ebp;
push ebx;
mov ecx,0xA0000000;
mov [cycle_counter],SH4_TIMESLICE;
mov [loop_no_update],offset no_update;
mov [intc_sched],offset intc_sched_offs;
mov eax,0;
//next_pc _MUST_ be on ecx
no_update:
mov esi,ecx;
call bm_GetCode
jmp eax;
intc_sched_offs:
add [cycle_counter],SH4_TIMESLICE;
call UpdateSystem;
cmp eax,0;
jnz do_iter;
ret;
do_iter:
pop ecx;
call rdv_DoInterrupts;
mov ecx,eax;
// cmp byte ptr [sh4_int_bCpuRun],0;
// jz cleanup;
jmp no_update;
cleanup:
pop ebx;
pop ebp;
pop edi;
pop esi;
ret;
}
}
naked void DYNACALL ngen_blockcheckfail(u32 addr)
{
__asm
{
call rdv_BlockCheckFail;
jmp eax;
}
}
naked void DYNACALL ngen_blockcheckfail2(u32 addr)
{
__asm
{
int 3;
call rdv_BlockCheckFail;
jmp eax;
}
}
#else
u32 gas_offs=offsetof(Sh4RCB,cntx.jdyn);
void (*ngen_FailedToFindBlock)()=&ngen_FailedToFindBlock_;
#endif

59
core/rec-x86/win86_ngen.h Normal file
View File

@ -0,0 +1,59 @@
#include "types.h"
#include "hw/sh4/sh4_opcode_list.h"
#include "hw/sh4/modules/ccn.h"
#include "hw/sh4/sh4_interrupts.h"
#include "hw/sh4/sh4_core.h"
#include "hw/sh4/dyna/ngen.h"
#include "hw/sh4/sh4_mem.h"
#include "hw/sh4/dyna/regalloc.h"
#include "emitter/x86_emitter.h"
#include "profiler/profiler.h"
#include "oslib/oslib.h"
void ngen_opcode(RuntimeBlockInfo* block, shil_opcode* op,x86_block* x86e, bool staging, bool optimise);
#if BUILD_COMPILER == COMPILER_GCC
extern "C"
{
#endif
void ngen_LinkBlock_Generic_stub();
void ngen_LinkBlock_cond_Next_stub();
void ngen_LinkBlock_cond_Branch_stub();
void ngen_FailedToFindBlock_();
void ngen_mainloop(void* p);
void DYNACALL ngen_blockcheckfail(u32 addr);
void DYNACALL ngen_blockcheckfail2(u32 addr);
#if BUILD_COMPILER == COMPILER_GCC
}
#endif
extern x86_block* x86e;
extern u32 cycle_counter;
extern void* loop_no_update;
extern void* intc_sched;
extern bool sse_1;
extern bool sse_2;
extern bool sse_3;
extern bool ssse_3;
extern bool mmx;
struct x86_reg_alloc: RegAlloc<x86_reg,x86_reg>
{
virtual void Preload(u32 reg,x86_reg nreg);
virtual void Writeback(u32 reg,x86_reg nreg);
virtual void Preload_FPU(u32 reg,x86_reg nreg);
virtual void Writeback_FPU(u32 reg,x86_reg nreg);
void FreezeXMM();
void ThawXMM();
};
extern x86_reg_alloc reg;

View File

@ -13,8 +13,10 @@
#if BUILD_COMPILER==COMPILER_VC
#define DYNACALL __fastcall
#define DYNACALL_T
#else
#define DYNACALL __attribute__((fastcall))
#define DYNACALL
#define DYNACALL_T __attribute__((fastcall))
#endif
#if BUILD_COMPILER==COMPILER_VC

View File

@ -20,7 +20,7 @@ LD=${CC}
MFLAGS := -m32
#-marm -march=armv7-a -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp -funroll-loops
ASFLAGS := -m32
ASFLAGS := -32
#-march=armv7-a -mfpu=neon -mfloat-abi=softfp
LDFLAGS := -m32 -g -Wl,-Map,$(notdir $@).map,--gc-sections -Wl,-O3 -Wl,--sort-common
@ -81,7 +81,6 @@ PACKAGE_FILES=$(EXECUTABLE_STRIPPED) default.gcw0.desktop icon-32.png
all: $(CPPFILES) $(EXECUTABLE) $(EXECUTABLE_STRIPPED)
$(EXECUTABLE): $(OBJECTS)
echo $(RZDCY_FILES)
$(CXX) $(MFLAGS) $(EXTRAFLAGS) $(LDFLAGS) $(OBJECTS) $(LIBS) -o $@
$(EXECUTABLE_STRIPPED): $(EXECUTABLE)