#include "ta.h" #include "ta_ctx.h" #include "hw/holly/holly_intc.h" /* Threaded TA Implementation Main thread -> ta data -> stored (tactx) Render/TA thread -> ta data -> draw lists -> draw */ #if HOST_CPU == CPU_X86 #include struct simd256_t { alignas(32) __m128 data[2]; }; #elif HOST_CPU == CPU_ARM && defined(__ARM_NEON__) #include struct simd256_t { alignas(32) uint64x2_t data[2]; }; #else struct simd256_t { alignas(32) u64 data[4]; }; #endif /* Partial TA parsing for in emu-side handling. Properly tracks 32/64 byte state, and Calls a helper function every time something important (SOL/EOL, etc) happens Uses a state machine, with 3 bits state and 8 bits (PT:OBJ[6:2]) input */ enum ta_state { // -> TAS_NS, TAS_PLV32, TAS_PLHV32, TAS_PLV64, TAS_PLHV64, TAS_MLV64 TAS_NS, // // -> TAS_NS, TAS_PLV32, TAS_PLHV32, TAS_PLV64, TAS_PLHV64 TAS_PLV32, //polygon list PMV, V32 // -> TAS_NS, TAS_PLV32, TAS_PLHV32, TAS_PLV64, TAS_PLHV64 TAS_PLV64, //polygon list PMV, V64 // -> TAS_NS, TAS_MLV64, TAS_MLV64_H TAS_MLV64, //mv list // -> TAS_PLV32 TAS_PLHV32, //polygon list PMV<64> 2nd half -> V32 // -> TAS_PLV64 TAS_PLHV64, //polygon list PMV<64> 2nd half -> V64 // -> TAS_PLV64_H TAS_PLV64_H, //polygon list V64 2nd half // -> TAS_MLV64 TAS_MLV64_H, //mv list, 64 bit half }; /* state | PTEOS | OBJ -> next, proc*/ #define ta_cur_state (ta_fsm[2048]) u8 ta_fsm[2049]; //[2048] stores the current state u32 ta_fsm_cl=7; static void fill_fsm(ta_state st, s8 pt, s8 obj, ta_state next, u32 proc=0, u32 sz64=0) { for (int i=0;i<8;i++) { if (pt != -1) i=pt; for (int j=0;j<32;j++) { if (obj != -1) j=obj; verify(ta_fsm[(st<<8)+(i<<5)+j]==(0x80+st)); ta_fsm[(st<<8)+(i<<5)+j]=next | proc*16 /*| sz64*32*/; if (obj != -1) break; } if (pt != -1) break; } } static void fill_fsm() { //initialise to invalid for (int i=0;i<2048;i++) ta_fsm[i]=(i>>8) | 0x80; for (int i=0;i<8;i++) { switch(i) { case ParamType_End_Of_List: { //End of list -> process it ! fill_fsm(TAS_NS,ParamType_End_Of_List,-1,TAS_NS,1); fill_fsm(TAS_PLV32,ParamType_End_Of_List,-1,TAS_NS,1); fill_fsm(TAS_PLV64,ParamType_End_Of_List,-1,TAS_NS,1); fill_fsm(TAS_MLV64,ParamType_End_Of_List,-1,TAS_NS,1); } break; case ParamType_User_Tile_Clip: case ParamType_Object_List_Set: { //32B commands, no state change fill_fsm(TAS_NS,i,-1,TAS_NS); fill_fsm(TAS_PLV32,i,-1,TAS_PLV32); fill_fsm(TAS_PLV64,i,-1,TAS_PLV64); fill_fsm(TAS_MLV64,i,-1,TAS_MLV64); } break; case 3: case 6: //invalid break; case ParamType_Polygon_or_Modifier_Volume: { //right .. its complicated alirte for (int k=0;k<32;k++) { u32 uid = TaTypeLut::instance().table[k * 4]; u32 vt=uid & 0x7f; bool v64 = vt == 5 || vt == 6 || vt == 11 || vt == 12 || vt == 13 || vt == 14; bool p64 = uid >> 31; ta_state nxt = p64 ? (v64 ? TAS_PLHV64 : TAS_PLHV32) : (v64 ? TAS_PLV64 : TAS_PLV32 ) ; fill_fsm(TAS_PLV32,i,k,nxt,0,p64); fill_fsm(TAS_PLV64,i,k,nxt,0,p64); } //32B command, no state change fill_fsm(TAS_MLV64,i,-1,TAS_MLV64); //process and start list fill_fsm(TAS_NS,i,-1,TAS_NS,1); } break; case ParamType_Sprite: { //SPR: 32B -> expect 64B data (PL*) fill_fsm(TAS_PLV32,i,-1,TAS_PLV64); fill_fsm(TAS_PLV64,i,-1,TAS_PLV64); //invalid for ML //process and start list fill_fsm(TAS_NS,i,-1,TAS_NS,1); } break; case ParamType_Vertex_Parameter: { //VTX: 32 B -> Expect more of it fill_fsm(TAS_PLV32,i,-1,TAS_PLV32,0,0); //VTX: 64 B -> Expect next 32B fill_fsm(TAS_PLV64,i,-1,TAS_PLV64_H,0,1); //MVO: 64B -> expect next 32B fill_fsm(TAS_MLV64,i,-1,TAS_MLV64_H,0,1); //invalid for NS } break; } } //? fill_fsm(TAS_PLHV32,-1,-1,TAS_PLV32); //64 PH -> expect V32 fill_fsm(TAS_PLHV64,-1,-1,TAS_PLV64); //64 PH -> expect V64 fill_fsm(TAS_PLV64_H,-1,-1,TAS_PLV64); //64 VH -> expect V64 fill_fsm(TAS_MLV64_H,-1,-1,TAS_MLV64); //64 MH -> expect M64 } static const HollyInterruptID ListEndInterrupt[5]= { holly_OPAQUE, holly_OPAQUEMOD, holly_TRANS, holly_TRANSMOD, holly_PUNCHTHRU }; static NOINLINE void DYNACALL ta_handle_cmd(u32 trans) { Ta_Dma* dat=(Ta_Dma*)(ta_tad.thd_data-32); u32 cmd = trans>>4; trans&=7; //printf("Process state transition: %d || %d -> %d \n",cmd,state_in,trans&0xF); if (cmd == 8) { //printf("Invalid TA Param %d\n", dat->pcw.ParaType); } else { if (dat->pcw.ParaType == ParamType_End_Of_List) { if (ta_fsm_cl==7) ta_fsm_cl=dat->pcw.ListType; //printf("List %d ended\n",ta_fsm_cl); asic_RaiseInterrupt( ListEndInterrupt[ta_fsm_cl]); ta_fsm_cl=7; trans=TAS_NS; } else if (dat->pcw.ParaType == ParamType_Polygon_or_Modifier_Volume) { if (ta_fsm_cl==7) ta_fsm_cl=dat->pcw.ListType; if (!IsModVolList(ta_fsm_cl)) trans=TAS_PLV32; else trans=TAS_MLV64; } else if (dat->pcw.ParaType == ParamType_Sprite) { if (ta_fsm_cl==7) ta_fsm_cl=dat->pcw.ListType; //verify(!IsModVolList(ta_fsm_cl)); // fails with "F1 World Grand Prix for Dreamcast" and only with dynarec... trans=TAS_PLV32; } else { die("WTF ?\n"); } } u32 state_in = (trans<<8) | (dat->pcw.ParaType<<5) | (dat->pcw.obj_ctrl>>2)%32; ta_cur_state=(ta_state)(ta_fsm[state_in]&0xF); verify(ta_cur_state<=7); } static OnLoad ol_fillfsm(&fill_fsm); void ta_vtx_ListCont() { SetCurrentTARC(TA_CURRENT_CTX); ta_tad.Continue(); ta_cur_state=TAS_NS; ta_fsm_cl = 7; } void ta_vtx_ListInit() { SetCurrentTARC(TA_CURRENT_CTX); ta_tad.ClearPartial(); ta_cur_state=TAS_NS; ta_fsm_cl = 7; } void ta_vtx_SoftReset() { ta_cur_state=TAS_NS; } static INLINE void DYNACALL ta_thd_data32_i(const simd256_t *data) { if (ta_ctx == NULL) { INFO_LOG(PVR, "Warning: data sent to TA prior to ListInit. Ignored"); return; } if (ta_tad.End() - ta_tad.thd_root >= TA_DATA_SIZE) { INFO_LOG(PVR, "Warning: TA data buffer overflow"); asic_RaiseInterrupt(holly_MATR_NOMEM); return; } simd256_t* dst = (simd256_t*)ta_tad.thd_data; // First byte is PCW PCW pcw = *(const PCW*)data; // Copy the TA data *dst = *data; ta_tad.thd_data += 32; //process TA state u32 state_in = (ta_cur_state << 8) | (pcw.ParaType << 5) | ((pcw.obj_ctrl >> 2) & 31); u32 trans = ta_fsm[state_in]; ta_cur_state = (ta_state)trans; bool must_handle = trans & 0xF0; if (likely(!must_handle)) { return; } else { ta_handle_cmd(trans); } } void DYNACALL ta_vtx_data32(const SQBuffer *data) { ta_thd_data32_i((const simd256_t *)data); } void ta_vtx_data(const SQBuffer *data, u32 size) { while (size >= 4) { ta_thd_data32_i((simd256_t *)data); data++; ta_thd_data32_i((simd256_t *)data); data++; ta_thd_data32_i((simd256_t *)data); data++; ta_thd_data32_i((simd256_t *)data); data++; size -= 4; } while (size > 0) { ta_thd_data32_i((simd256_t *)data); data++; size--; } }