diff --git a/Source/Android/PluginRSP/PluginRSP.vcxproj b/Source/Android/PluginRSP/PluginRSP.vcxproj new file mode 100644 index 000000000..81f9dc27c --- /dev/null +++ b/Source/Android/PluginRSP/PluginRSP.vcxproj @@ -0,0 +1,67 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {B685BB34-D700-4FCC-8503-9B6AA1A0C95D} + Win32Proj + RSPhle + + + DynamicLibrary + + + + + + + + + RSP-HLE + RSP-HLE_d + $(SolutionDir)Plugin\RSP\ + $(SolutionDir)Plugin64\RSP\ + + + + NotUsing + + + + + + + + + + + + + + + + + Create + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Source/Android/PluginRSP/PluginRSP.vcxproj.filters b/Source/Android/PluginRSP/PluginRSP.vcxproj.filters new file mode 100644 index 000000000..4b42503c2 --- /dev/null +++ b/Source/Android/PluginRSP/PluginRSP.vcxproj.filters @@ -0,0 +1,90 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Source/Android/PluginRSP/Rsp.h b/Source/Android/PluginRSP/Rsp.h new file mode 100644 index 000000000..3faa9e143 --- /dev/null +++ b/Source/Android/PluginRSP/Rsp.h @@ -0,0 +1,60 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#pragma once + +#include "Common.h" + +typedef struct +{ + void * hInst; + int32_t MemoryBswaped; /* If this is set to TRUE, then the memory has been pre + bswap on a dword (32 bits) boundry */ + uint8_t * RDRAM; + uint8_t * DMEM; + uint8_t * IMEM; + + uint32_t * MI_INTR_REG; + + uint32_t * SP_MEM_ADDR_REG; + uint32_t * SP_DRAM_ADDR_REG; + uint32_t * SP_RD_LEN_REG; + uint32_t * SP_WR_LEN_REG; + uint32_t * SP_STATUS_REG; + uint32_t * SP_DMA_FULL_REG; + uint32_t * SP_DMA_BUSY_REG; + uint32_t * SP_PC_REG; + uint32_t * SP_SEMAPHORE_REG; + + uint32_t * DPC_START_REG; + uint32_t * DPC_END_REG; + uint32_t * DPC_CURRENT_REG; + uint32_t * DPC_STATUS_REG; + uint32_t * DPC_CLOCK_REG; + uint32_t * DPC_BUFBUSY_REG; + uint32_t * DPC_PIPEBUSY_REG; + uint32_t * DPC_TMEM_REG; + + void(*CheckInterrupts)(void); + void(*ProcessDList)(void); + void(*ProcessAList)(void); + void(*ProcessRdpList)(void); + void(*ShowCFB)(void); +} RSP_INFO; + +EXPORT void CloseDLL(void); +EXPORT void DllAbout(void * hParent); +EXPORT uint32_t DoRspCycles(uint32_t Cycles); +EXPORT void GetDllInfo(PLUGIN_INFO * PluginInfo); +EXPORT void InitiateRSP(RSP_INFO Rsp_Info, uint32_t * CycleCount); +EXPORT void RomOpen(void); +EXPORT void RomClosed(void); +EXPORT void DllConfig(void * hWnd); +EXPORT void PluginLoaded(void); diff --git a/Source/Android/PluginRSP/Version.h b/Source/Android/PluginRSP/Version.h new file mode 100644 index 000000000..368692156 --- /dev/null +++ b/Source/Android/PluginRSP/Version.h @@ -0,0 +1,41 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#define STRINGIZE2(s) #s +#define STRINGIZE(s) STRINGIZE2(s) + +#define VERSION_MAJOR 1 +#define VERSION_MINOR 0 +#define VERSION_REVISION 0 +#define VERSION_BUILD 9999 + +#define VER_FILE_DESCRIPTION_STR "RSP HLE Plugin" +#define VER_FILE_VERSION VERSION_MAJOR, VERSION_MINOR, VERSION_REVISION, VERSION_BUILD +#define VER_FILE_VERSION_STR STRINGIZE(VERSION_MAJOR) \ + "." STRINGIZE(VERSION_MINOR) \ + "." STRINGIZE(VERSION_REVISION) \ + "." STRINGIZE(VERSION_BUILD) \ + +#define VER_PRODUCTNAME_STR "RSP-HLE" +#define VER_PRODUCT_VERSION VER_FILE_VERSION +#define VER_PRODUCT_VERSION_STR VER_FILE_VERSION_STR +#define VER_ORIGINAL_FILENAME_STR VER_PRODUCTNAME_STR ".dll" +#define VER_INTERNAL_NAME_STR VER_PRODUCTNAME_STR +#define VER_COPYRIGHT_STR "Copyright (C) 2016" + +#ifdef _DEBUG +#define VER_VER_DEBUG VS_FF_DEBUG +#else +#define VER_VER_DEBUG 0 +#endif + +#define VER_FILEOS VOS_NT_WINDOWS32 +#define VER_FILEFLAGS VER_VER_DEBUG +#define VER_FILETYPE VFT_DLL diff --git a/Source/Android/PluginRSP/alist.cpp b/Source/Android/PluginRSP/alist.cpp new file mode 100644 index 000000000..36cc56eda --- /dev/null +++ b/Source/Android/PluginRSP/alist.cpp @@ -0,0 +1,963 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include +#include "alist.h" +#include "arithmetics.h" +#include "audio.h" +#include "mem.h" + +struct ramp_t +{ + int64_t value; + int64_t step; + int64_t target; +}; + +/* local functions */ +static void swap(int16_t **a, int16_t **b) +{ + int16_t* tmp = *b; + *b = *a; + *a = tmp; +} + +static int16_t* sample(CHle * hle, unsigned pos) +{ + return (int16_t*)hle->alist_buffer() + (pos ^ S); +} + +static uint8_t* alist_u8(CHle * hle, uint16_t dmem) +{ + return u8(hle->alist_buffer(), dmem); +} + +static int16_t* alist_s16(CHle * hle, uint16_t dmem) +{ + return (int16_t*)u16(hle->alist_buffer(), dmem); +} + +static void sample_mix(int16_t* dst, int16_t src, int16_t gain) +{ + *dst = clamp_s16(*dst + ((src * gain) >> 15)); +} + +static void alist_envmix_mix(size_t n, int16_t** dst, const int16_t* gains, int16_t src) +{ + size_t i; + + for (i = 0; i < n; ++i) + { + sample_mix(dst[i], src, gains[i]); + } +} + +static int16_t ramp_step(struct ramp_t* ramp) +{ + bool target_reached; + + ramp->value += ramp->step; + + target_reached = (ramp->step <= 0) ? (ramp->value <= ramp->target) : (ramp->value >= ramp->target); + + if (target_reached) + { + ramp->value = ramp->target; + ramp->step = 0; + } + + return (int16_t)(ramp->value >> 16); +} + +/* global functions */ +void alist_process(CHle * hle, const acmd_callback_t abi[], unsigned int abi_size) +{ + uint32_t w1, w2; + unsigned int acmd; + + const uint32_t *alist = dram_u32(hle, *dmem_u32(hle, TASK_DATA_PTR)); + const uint32_t *const alist_end = alist + (*dmem_u32(hle, TASK_DATA_SIZE) >> 2); + + while (alist != alist_end) + { + w1 = *(alist++); + w2 = *(alist++); + + acmd = (w1 >> 24) & 0x7f; + + if (acmd < abi_size) + { + (*abi[acmd])(hle, w1, w2); + } + else + { + hle->WarnMessage("Invalid ABI command %u", acmd); + } + } +} + +uint32_t alist_get_address(CHle * hle, uint32_t so, const uint32_t *segments, size_t n) +{ + uint8_t segment = (so >> 24) & 0x3f; + uint32_t offset = (so & 0xffffff); + + if (segment >= n) + { + hle->WarnMessage("Invalid segment %u", segment); + return offset; + } + + return segments[segment] + offset; +} + +void alist_set_address(CHle * hle, uint32_t so, uint32_t *segments, size_t n) +{ + uint8_t segment = (so >> 24) & 0x3f; + uint32_t offset = (so & 0xffffff); + + if (segment >= n) + { + hle->WarnMessage("Invalid segment %u", segment); + return; + } + + segments[segment] = offset; +} + +void alist_clear(CHle * hle, uint16_t dmem, uint16_t count) +{ + while (count != 0) + { + *alist_u8(hle, dmem++) = 0; + --count; + } +} + +void alist_load(CHle * hle, uint16_t dmem, uint32_t address, uint16_t count) +{ + /* enforce DMA alignment constraints */ + dmem &= ~3; + address &= ~7; + count = align(count, 8); + memcpy(hle->alist_buffer() + dmem, hle->dram() + address, count); +} + +void alist_save(CHle * hle, uint16_t dmem, uint32_t address, uint16_t count) +{ + /* enforce DMA alignment constraints */ + dmem &= ~3; + address &= ~7; + count = align(count, 8); + memcpy(hle->dram() + address, hle->alist_buffer() + dmem, count); +} + +void alist_move(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count) +{ + while (count != 0) + { + *alist_u8(hle, dmemo++) = *alist_u8(hle, dmemi++); + --count; + } +} + +void alist_copy_every_other_sample(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count) +{ + while (count != 0) + { + *alist_s16(hle, dmemo) = *alist_s16(hle, dmemi); + dmemo += 2; + dmemi += 4; + --count; + } +} + +void alist_repeat64(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint8_t count) +{ + uint16_t buffer[64]; + memcpy(buffer, hle->alist_buffer() + dmemi, 128); + + while(count != 0) + { + memcpy(hle->alist_buffer() + dmemo, buffer, 128); + dmemo += 128; + --count; + } +} + +void alist_copy_blocks(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t block_size, uint8_t count) +{ + int block_left = count; + + do + { + int bytes_left = block_size; + do + { + memcpy(hle->alist_buffer() + dmemo, hle->alist_buffer() + dmemi, 0x20); + bytes_left -= 0x20; + + dmemi += 0x20; + dmemo += 0x20; + } while(bytes_left > 0); + + --block_left; + } while(block_left > 0); +} + +void alist_interleave(CHle * hle, uint16_t dmemo, uint16_t left, uint16_t right, uint16_t count) +{ + uint16_t *dst = (uint16_t*)(hle->alist_buffer() + dmemo); + const uint16_t *srcL = (uint16_t*)(hle->alist_buffer() + left); + const uint16_t *srcR = (uint16_t*)(hle->alist_buffer() + right); + + count >>= 2; + + while (count != 0) + { + uint16_t l1 = *(srcL++); + uint16_t l2 = *(srcL++); + uint16_t r1 = *(srcR++); + uint16_t r2 = *(srcR++); + +#if M64P_BIG_ENDIAN + *(dst++) = l1; + *(dst++) = r1; + *(dst++) = l2; + *(dst++) = r2; +#else + *(dst++) = r2; + *(dst++) = l2; + *(dst++) = r1; + *(dst++) = l1; +#endif + --count; + } +} + +void alist_envmix_exp( CHle * hle, bool init, bool aux, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, uint16_t count, int16_t dry, int16_t wet, const int16_t *vol, const int16_t *target, const int32_t *rate, uint32_t address) +{ + size_t n = (aux) ? 4 : 2; + + const int16_t* const in = (int16_t*)(hle->alist_buffer() + dmemi); + int16_t* const dl = (int16_t*)(hle->alist_buffer() + dmem_dl); + int16_t* const dr = (int16_t*)(hle->alist_buffer() + dmem_dr); + int16_t* const wl = (int16_t*)(hle->alist_buffer() + dmem_wl); + int16_t* const wr = (int16_t*)(hle->alist_buffer() + dmem_wr); + + struct ramp_t ramps[2]; + int32_t exp_seq[2]; + int32_t exp_rates[2]; + + uint32_t ptr = 0; + int x, y; + short save_buffer[40]; + + if (init) + { + ramps[0].value = (vol[0] << 16); + ramps[1].value = (vol[1] << 16); + ramps[0].target = (target[0] << 16); + ramps[1].target = (target[1] << 16); + exp_rates[0] = rate[0]; + exp_rates[1] = rate[1]; + exp_seq[0] = (vol[0] * rate[0]); + exp_seq[1] = (vol[1] * rate[1]); + } + else + { + memcpy((uint8_t *)save_buffer, (hle->dram() + address), 80); + wet = *(int16_t *)(save_buffer + 0); /* 0-1 */ + dry = *(int16_t *)(save_buffer + 2); /* 2-3 */ + ramps[0].target = *(int32_t *)(save_buffer + 4); /* 4-5 */ + ramps[1].target = *(int32_t *)(save_buffer + 6); /* 6-7 */ + exp_rates[0] = *(int32_t *)(save_buffer + 8); /* 8-9 (save_buffer is a 16bit pointer) */ + exp_rates[1] = *(int32_t *)(save_buffer + 10); /* 10-11 */ + exp_seq[0] = *(int32_t *)(save_buffer + 12); /* 12-13 */ + exp_seq[1] = *(int32_t *)(save_buffer + 14); /* 14-15 */ + ramps[0].value = *(int32_t *)(save_buffer + 16); /* 12-13 */ + ramps[1].value = *(int32_t *)(save_buffer + 18); /* 14-15 */ + } + + /* init which ensure ramp.step != 0 iff ramp.value == ramp.target */ + ramps[0].step = ramps[0].target - ramps[0].value; + ramps[1].step = ramps[1].target - ramps[1].value; + + for (y = 0; y < count; y += 16) + { + if (ramps[0].step != 0) + { + exp_seq[0] = ((int64_t)exp_seq[0] * (int64_t)exp_rates[0]) >> 16; + ramps[0].step = (exp_seq[0] - ramps[0].value) >> 3; + } + + if (ramps[1].step != 0) + { + exp_seq[1] = ((int64_t)exp_seq[1] * (int64_t)exp_rates[1]) >> 16; + ramps[1].step = (exp_seq[1] - ramps[1].value) >> 3; + } + + for (x = 0; x < 8; ++x) + { + int16_t gains[4]; + int16_t* buffers[4]; + int16_t l_vol = ramp_step(&ramps[0]); + int16_t r_vol = ramp_step(&ramps[1]); + + buffers[0] = dl + (ptr^S); + buffers[1] = dr + (ptr^S); + buffers[2] = wl + (ptr^S); + buffers[3] = wr + (ptr^S); + + gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15); + gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15); + gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15); + gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15); + + alist_envmix_mix(n, buffers, gains, in[ptr^S]); + ++ptr; + } + } + + *(int16_t *)(save_buffer + 0) = wet; /* 0-1 */ + *(int16_t *)(save_buffer + 2) = dry; /* 2-3 */ + *(int32_t *)(save_buffer + 4) = (int32_t)ramps[0].target; /* 4-5 */ + *(int32_t *)(save_buffer + 6) = (int32_t)ramps[1].target; /* 6-7 */ + *(int32_t *)(save_buffer + 8) = exp_rates[0]; /* 8-9 (save_buffer is a 16bit pointer) */ + *(int32_t *)(save_buffer + 10) = exp_rates[1]; /* 10-11 */ + *(int32_t *)(save_buffer + 12) = exp_seq[0]; /* 12-13 */ + *(int32_t *)(save_buffer + 14) = exp_seq[1]; /* 14-15 */ + *(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value; /* 12-13 */ + *(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value; /* 14-15 */ + memcpy(hle->dram() + address, (uint8_t *)save_buffer, 80); +} + +void alist_envmix_ge( CHle * hle, bool init, bool aux, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, uint16_t count, int16_t dry, int16_t wet, const int16_t *vol, const int16_t *target, const int32_t *rate, uint32_t address) +{ + unsigned k; + size_t n = (aux) ? 4 : 2; + + const int16_t* const in = (int16_t*)(hle->alist_buffer() + dmemi); + int16_t* const dl = (int16_t*)(hle->alist_buffer() + dmem_dl); + int16_t* const dr = (int16_t*)(hle->alist_buffer() + dmem_dr); + int16_t* const wl = (int16_t*)(hle->alist_buffer() + dmem_wl); + int16_t* const wr = (int16_t*)(hle->alist_buffer() + dmem_wr); + + struct ramp_t ramps[2]; + short save_buffer[40]; + + if (init) + { + ramps[0].value = (vol[0] << 16); + ramps[1].value = (vol[1] << 16); + ramps[0].target = (target[0] << 16); + ramps[1].target = (target[1] << 16); + ramps[0].step = rate[0] / 8; + ramps[1].step = rate[1] / 8; + } + else + { + memcpy((uint8_t *)save_buffer, (hle->dram() + address), 80); + wet = *(int16_t *)(save_buffer + 0); /* 0-1 */ + dry = *(int16_t *)(save_buffer + 2); /* 2-3 */ + ramps[0].target = *(int32_t *)(save_buffer + 4); /* 4-5 */ + ramps[1].target = *(int32_t *)(save_buffer + 6); /* 6-7 */ + ramps[0].step = *(int32_t *)(save_buffer + 8); /* 8-9 (save_buffer is a 16bit pointer) */ + ramps[1].step = *(int32_t *)(save_buffer + 10); /* 10-11 */ + /* *(int32_t *)(save_buffer + 12);*/ /* 12-13 */ + /* *(int32_t *)(save_buffer + 14);*/ /* 14-15 */ + ramps[0].value = *(int32_t *)(save_buffer + 16); /* 12-13 */ + ramps[1].value = *(int32_t *)(save_buffer + 18); /* 14-15 */ + } + + count >>= 1; + for (k = 0; k < count; ++k) + { + int16_t gains[4]; + int16_t* buffers[4]; + int16_t l_vol = ramp_step(&ramps[0]); + int16_t r_vol = ramp_step(&ramps[1]); + + buffers[0] = dl + (k^S); + buffers[1] = dr + (k^S); + buffers[2] = wl + (k^S); + buffers[3] = wr + (k^S); + + gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15); + gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15); + gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15); + gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15); + + alist_envmix_mix(n, buffers, gains, in[k^S]); + } + + *(int16_t *)(save_buffer + 0) = wet; /* 0-1 */ + *(int16_t *)(save_buffer + 2) = dry; /* 2-3 */ + *(int32_t *)(save_buffer + 4) = (int32_t)ramps[0].target; /* 4-5 */ + *(int32_t *)(save_buffer + 6) = (int32_t)ramps[1].target; /* 6-7 */ + *(int32_t *)(save_buffer + 8) = (int32_t)ramps[0].step; /* 8-9 (save_buffer is a 16bit pointer) */ + *(int32_t *)(save_buffer + 10) = (int32_t)ramps[1].step; /* 10-11 */ + /**(int32_t *)(save_buffer + 12);*/ /* 12-13 */ + /**(int32_t *)(save_buffer + 14);*/ /* 14-15 */ + *(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value; /* 12-13 */ + *(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value; /* 14-15 */ + memcpy(hle->dram() + address, (uint8_t *)save_buffer, 80); +} + +void alist_envmix_lin(CHle * hle, bool init, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, uint16_t count, int16_t dry, int16_t wet, const int16_t *vol, const int16_t *target, const int32_t *rate, uint32_t address) +{ + size_t k; + struct ramp_t ramps[2]; + int16_t save_buffer[40]; + + const int16_t * const in = (int16_t*)(hle->alist_buffer() + dmemi); + int16_t* const dl = (int16_t*)(hle->alist_buffer() + dmem_dl); + int16_t* const dr = (int16_t*)(hle->alist_buffer() + dmem_dr); + int16_t* const wl = (int16_t*)(hle->alist_buffer() + dmem_wl); + int16_t* const wr = (int16_t*)(hle->alist_buffer() + dmem_wr); + + if (init) + { + ramps[0].step = rate[0] / 8; + ramps[0].value = (vol[0] << 16); + ramps[0].target = (target[0] << 16); + ramps[1].step = rate[1] / 8; + ramps[1].value = (vol[1] << 16); + ramps[1].target = (target[1] << 16); + } + else + { + memcpy((uint8_t *)save_buffer, hle->dram() + address, 80); + wet = *(int16_t *)(save_buffer + 0); /* 0-1 */ + dry = *(int16_t *)(save_buffer + 2); /* 2-3 */ + ramps[0].target = *(int16_t *)(save_buffer + 4) << 16; /* 4-5 */ + ramps[1].target = *(int16_t *)(save_buffer + 6) << 16; /* 6-7 */ + ramps[0].step = *(int32_t *)(save_buffer + 8); /* 8-9 (save_buffer is a 16bit pointer) */ + ramps[1].step = *(int32_t *)(save_buffer + 10); /* 10-11 */ + ramps[0].value = *(int32_t *)(save_buffer + 16); /* 16-17 */ + ramps[1].value = *(int32_t *)(save_buffer + 18); /* 16-17 */ + } + + count >>= 1; + for(k = 0; k < count; ++k) + { + int16_t gains[4]; + int16_t* buffers[4]; + int16_t l_vol = ramp_step(&ramps[0]); + int16_t r_vol = ramp_step(&ramps[1]); + + buffers[0] = dl + (k^S); + buffers[1] = dr + (k^S); + buffers[2] = wl + (k^S); + buffers[3] = wr + (k^S); + + gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15); + gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15); + gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15); + gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15); + + alist_envmix_mix(4, buffers, gains, in[k^S]); + } + + *(int16_t *)(save_buffer + 0) = wet; /* 0-1 */ + *(int16_t *)(save_buffer + 2) = dry; /* 2-3 */ + *(int16_t *)(save_buffer + 4) = (int16_t)(ramps[0].target >> 16); /* 4-5 */ + *(int16_t *)(save_buffer + 6) = (int16_t)(ramps[1].target >> 16); /* 6-7 */ + *(int32_t *)(save_buffer + 8) = (int32_t)ramps[0].step; /* 8-9 (save_buffer is a 16bit pointer) */ + *(int32_t *)(save_buffer + 10) = (int32_t)ramps[1].step; /* 10-11 */ + *(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value; /* 16-17 */ + *(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value; /* 18-19 */ + memcpy(hle->dram() + address, (uint8_t *)save_buffer, 80); +} + +void alist_mix(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count, int16_t gain) +{ + int16_t *dst = (int16_t*)(hle->alist_buffer() + dmemo); + const int16_t *src = (int16_t*)(hle->alist_buffer() + dmemi); + + count >>= 1; + + while (count != 0) + { + sample_mix(dst, *src, gain); + + ++dst; + ++src; + --count; + } +} + +void alist_envmix_nead( CHle * hle, bool swap_wet_LR, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, unsigned count, uint16_t *env_values, uint16_t *env_steps, const int16_t *xors) +{ + int16_t *in = (int16_t*)(hle->alist_buffer() + dmemi); + int16_t *dl = (int16_t*)(hle->alist_buffer() + dmem_dl); + int16_t *dr = (int16_t*)(hle->alist_buffer() + dmem_dr); + int16_t *wl = (int16_t*)(hle->alist_buffer() + dmem_wl); + int16_t *wr = (int16_t*)(hle->alist_buffer() + dmem_wr); + + /* make sure count is a multiple of 8 */ + count = align(count, 8); + + if (swap_wet_LR) + { + swap(&wl, &wr); + } + + while (count != 0) + { + size_t i; + for(i = 0; i < 8; ++i) + { + int16_t l = (((int32_t)in[i^S] * (uint32_t)env_values[0]) >> 16) ^ xors[0]; + int16_t r = (((int32_t)in[i^S] * (uint32_t)env_values[1]) >> 16) ^ xors[1]; + int16_t l2 = (((int32_t)l * (uint32_t)env_values[2]) >> 16) ^ xors[2]; + int16_t r2 = (((int32_t)r * (uint32_t)env_values[2]) >> 16) ^ xors[3]; + + dl[i^S] = clamp_s16(dl[i^S] + l); + dr[i^S] = clamp_s16(dr[i^S] + r); + wl[i^S] = clamp_s16(wl[i^S] + l2); + wr[i^S] = clamp_s16(wr[i^S] + r2); + } + + env_values[0] += env_steps[0]; + env_values[1] += env_steps[1]; + env_values[2] += env_steps[2]; + + dl += 8; + dr += 8; + wl += 8; + wr += 8; + in += 8; + count -= 8; + } +} + +void alist_add(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count) +{ + int16_t *dst = (int16_t*)(hle->alist_buffer() + dmemo); + const int16_t *src = (int16_t*)(hle->alist_buffer() + dmemi); + + count >>= 1; + + while(count != 0) + { + *dst = clamp_s16(*dst + *src); + + ++dst; + ++src; + --count; + } +} + +void alist_multQ44(CHle * hle, uint16_t dmem, uint16_t count, int8_t gain) +{ + int16_t *dst = (int16_t*)(hle->alist_buffer() + dmem); + + count >>= 1; + + while(count != 0) + { + *dst = clamp_s16(*dst * gain >> 4); + + ++dst; + --count; + } +} + +static void alist_resample_reset(CHle * hle, uint16_t pos, uint32_t* pitch_accu) +{ + unsigned k; + + for (k = 0; k < 4; ++k) + { + *sample(hle, pos + k) = 0; + } + *pitch_accu = 0; +} + +static void alist_resample_load(CHle * hle, uint32_t address, uint16_t pos, uint32_t* pitch_accu) +{ + *sample(hle, pos + 0) = *dram_u16(hle, address + 0); + *sample(hle, pos + 1) = *dram_u16(hle, address + 2); + *sample(hle, pos + 2) = *dram_u16(hle, address + 4); + *sample(hle, pos + 3) = *dram_u16(hle, address + 6); + + *pitch_accu = *dram_u16(hle, address + 8); +} + +static void alist_resample_save(CHle * hle, uint32_t address, uint16_t pos, uint32_t pitch_accu) +{ + *dram_u16(hle, address + 0) = *sample(hle, pos + 0); + *dram_u16(hle, address + 2) = *sample(hle, pos + 1); + *dram_u16(hle, address + 4) = *sample(hle, pos + 2); + *dram_u16(hle, address + 6) = *sample(hle, pos + 3); + + *dram_u16(hle, address + 8) = pitch_accu; +} + +void alist_resample( CHle * hle, bool init, bool flag2, uint16_t dmemo, uint16_t dmemi, uint16_t count, uint32_t pitch /* Q16.16 */, uint32_t address) +{ + uint32_t pitch_accu; + + uint16_t ipos = dmemi >> 1; + uint16_t opos = dmemo >> 1; + count >>= 1; + ipos -= 4; + + if (flag2) + { + hle->WarnMessage("alist_resample: flag2 is not implemented"); + } + + if (init) + { + alist_resample_reset(hle, ipos, &pitch_accu); + } + else + { + alist_resample_load(hle, address, ipos, &pitch_accu); + } + + while (count != 0) + { + const int16_t* lut = RESAMPLE_LUT + ((pitch_accu & 0xfc00) >> 8); + + *sample(hle, opos++) = clamp_s16( + ((*sample(hle, ipos) * lut[0]) >> 15) + + ((*sample(hle, ipos + 1) * lut[1]) >> 15) + + ((*sample(hle, ipos + 2) * lut[2]) >> 15) + + ((*sample(hle, ipos + 3) * lut[3]) >> 15)); + + pitch_accu += pitch; + ipos += (pitch_accu >> 16); + pitch_accu &= 0xffff; + --count; + } + + alist_resample_save(hle, address, ipos, pitch_accu); +} + +void alist_resample_zoh(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count, uint32_t pitch, uint32_t pitch_accu) +{ + uint16_t ipos = dmemi >> 1; + uint16_t opos = dmemo >> 1; + count >>= 1; + + while(count != 0) + { + *sample(hle, opos++) = *sample(hle, ipos); + + pitch_accu += pitch; + ipos += (pitch_accu >> 16); + pitch_accu &= 0xffff; + --count; + } +} + +typedef unsigned int(*adpcm_predict_frame_t)(CHle * hle, int16_t* dst, uint16_t dmemi, unsigned char scale); + +static unsigned int adpcm_predict_frame_4bits(CHle * hle, int16_t* dst, uint16_t dmemi, unsigned char scale) +{ + unsigned int i; + unsigned int rshift = (scale < 12) ? 12 - scale : 0; + + for (i = 0; i < 8; ++i) + { + uint8_t byte = *alist_u8(hle, dmemi++); + + *(dst++) = adpcm_predict_sample(byte, 0xf0, 8, rshift); + *(dst++) = adpcm_predict_sample(byte, 0x0f, 12, rshift); + } + + return 8; +} + +static unsigned int adpcm_predict_frame_2bits(CHle * hle, int16_t* dst, uint16_t dmemi, unsigned char scale) +{ + unsigned int i; + unsigned int rshift = (scale < 14) ? 14 - scale : 0; + + for (i = 0; i < 4; ++i) + { + uint8_t byte = *alist_u8(hle, dmemi++); + + *(dst++) = adpcm_predict_sample(byte, 0xc0, 8, rshift); + *(dst++) = adpcm_predict_sample(byte, 0x30, 10, rshift); + *(dst++) = adpcm_predict_sample(byte, 0x0c, 12, rshift); + *(dst++) = adpcm_predict_sample(byte, 0x03, 14, rshift); + } + + return 4; +} + +void alist_adpcm( CHle * hle, bool init, bool loop, bool two_bit_per_sample, uint16_t dmemo, uint16_t dmemi, uint16_t count, const int16_t* codebook, uint32_t loop_address, uint32_t last_frame_address) +{ + int16_t last_frame[16]; + size_t i; + + adpcm_predict_frame_t predict_frame = (two_bit_per_sample) + ? adpcm_predict_frame_2bits + : adpcm_predict_frame_4bits; + + assert((count & 0x1f) == 0); + + if (init) + { + memset(last_frame, 0, 16 * sizeof(last_frame[0])); + } + else + { + dram_load_u16(hle, (uint16_t*)last_frame, (loop) ? loop_address : last_frame_address, 16); + } + + for (i = 0; i < 16; ++i, dmemo += 2) + { + *alist_s16(hle, dmemo) = last_frame[i]; + } + while (count != 0) + { + int16_t frame[16]; + uint8_t code = *alist_u8(hle, dmemi++); + unsigned char scale = (code & 0xf0) >> 4; + const int16_t* const cb_entry = codebook + ((code & 0xf) << 4); + + dmemi += predict_frame(hle, frame, dmemi, scale); + + adpcm_compute_residuals(last_frame, frame, cb_entry, last_frame + 14, 8); + adpcm_compute_residuals(last_frame + 8, frame + 8, cb_entry, last_frame + 6, 8); + + for (i = 0; i < 16; ++i, dmemo += 2) + { + *alist_s16(hle, dmemo) = last_frame[i]; + } + count -= 32; + } + + dram_store_u16(hle, (uint16_t*)last_frame, last_frame_address, 16); +} + +void alist_filter( CHle * hle, uint16_t dmem,uint16_t count, uint32_t address, const uint32_t* lut_address) +{ + int x; + int16_t outbuff[0x3c0]; + int16_t *outp = outbuff; + + int16_t* const lutt6 = (int16_t*)(hle->dram() + lut_address[0]); + int16_t* const lutt5 = (int16_t*)(hle->dram() + lut_address[1]); + + int16_t* in1 = (int16_t*)(hle->dram() + address); + int16_t* in2 = (int16_t*)(hle->alist_buffer() + dmem); + + for (x = 0; x < 8; ++x) + { + int32_t v = (lutt5[x] + lutt6[x]) >> 1; + lutt5[x] = lutt6[x] = v; + } + + for (x = 0; x < count; x += 16) + { + int32_t v[8]; + + v[1] = in1[0] * lutt6[6]; + v[1] += in1[3] * lutt6[7]; + v[1] += in1[2] * lutt6[4]; + v[1] += in1[5] * lutt6[5]; + v[1] += in1[4] * lutt6[2]; + v[1] += in1[7] * lutt6[3]; + v[1] += in1[6] * lutt6[0]; + v[1] += in2[1] * lutt6[1]; /* 1 */ + + v[0] = in1[3] * lutt6[6]; + v[0] += in1[2] * lutt6[7]; + v[0] += in1[5] * lutt6[4]; + v[0] += in1[4] * lutt6[5]; + v[0] += in1[7] * lutt6[2]; + v[0] += in1[6] * lutt6[3]; + v[0] += in2[1] * lutt6[0]; + v[0] += in2[0] * lutt6[1]; + + v[3] = in1[2] * lutt6[6]; + v[3] += in1[5] * lutt6[7]; + v[3] += in1[4] * lutt6[4]; + v[3] += in1[7] * lutt6[5]; + v[3] += in1[6] * lutt6[2]; + v[3] += in2[1] * lutt6[3]; + v[3] += in2[0] * lutt6[0]; + v[3] += in2[3] * lutt6[1]; + + v[2] = in1[5] * lutt6[6]; + v[2] += in1[4] * lutt6[7]; + v[2] += in1[7] * lutt6[4]; + v[2] += in1[6] * lutt6[5]; + v[2] += in2[1] * lutt6[2]; + v[2] += in2[0] * lutt6[3]; + v[2] += in2[3] * lutt6[0]; + v[2] += in2[2] * lutt6[1]; + + v[5] = in1[4] * lutt6[6]; + v[5] += in1[7] * lutt6[7]; + v[5] += in1[6] * lutt6[4]; + v[5] += in2[1] * lutt6[5]; + v[5] += in2[0] * lutt6[2]; + v[5] += in2[3] * lutt6[3]; + v[5] += in2[2] * lutt6[0]; + v[5] += in2[5] * lutt6[1]; + + v[4] = in1[7] * lutt6[6]; + v[4] += in1[6] * lutt6[7]; + v[4] += in2[1] * lutt6[4]; + v[4] += in2[0] * lutt6[5]; + v[4] += in2[3] * lutt6[2]; + v[4] += in2[2] * lutt6[3]; + v[4] += in2[5] * lutt6[0]; + v[4] += in2[4] * lutt6[1]; + + v[7] = in1[6] * lutt6[6]; + v[7] += in2[1] * lutt6[7]; + v[7] += in2[0] * lutt6[4]; + v[7] += in2[3] * lutt6[5]; + v[7] += in2[2] * lutt6[2]; + v[7] += in2[5] * lutt6[3]; + v[7] += in2[4] * lutt6[0]; + v[7] += in2[7] * lutt6[1]; + + v[6] = in2[1] * lutt6[6]; + v[6] += in2[0] * lutt6[7]; + v[6] += in2[3] * lutt6[4]; + v[6] += in2[2] * lutt6[5]; + v[6] += in2[5] * lutt6[2]; + v[6] += in2[4] * lutt6[3]; + v[6] += in2[7] * lutt6[0]; + v[6] += in2[6] * lutt6[1]; + + outp[1] = ((v[1] + 0x4000) >> 15); + outp[0] = ((v[0] + 0x4000) >> 15); + outp[3] = ((v[3] + 0x4000) >> 15); + outp[2] = ((v[2] + 0x4000) >> 15); + outp[5] = ((v[5] + 0x4000) >> 15); + outp[4] = ((v[4] + 0x4000) >> 15); + outp[7] = ((v[7] + 0x4000) >> 15); + outp[6] = ((v[6] + 0x4000) >> 15); + in1 = in2; + in2 += 8; + outp += 8; + } + + memcpy(hle->dram() + address, in2 - 8, 16); + memcpy(hle->alist_buffer() + dmem, outbuff, count); +} + +void alist_polef(CHle * hle, bool init, uint16_t dmemo, uint16_t dmemi, uint16_t count, uint16_t gain, int16_t* table, uint32_t address) +{ + int16_t *dst = (int16_t*)(hle->alist_buffer() + dmemo); + + const int16_t* const h1 = table; + int16_t* const h2 = table + 8; + + unsigned i; + int16_t l1, l2; + int16_t h2_before[8]; + + count = align(count, 16); + + if (init) + { + l1 = 0; + l2 = 0; + } + else + { + l1 = *dram_u16(hle, address + 4); + l2 = *dram_u16(hle, address + 6); + } + + for (i = 0; i < 8; ++i) + { + h2_before[i] = h2[i]; + h2[i] = (((int32_t)h2[i] * gain) >> 14); + } + + do + { + int16_t frame[8]; + + for (i = 0; i < 8; ++i, dmemi += 2) + { + frame[i] = *alist_s16(hle, dmemi); + } + + for (i = 0; i < 8; ++i) + { + int32_t accu = frame[i] * gain; + accu += h1[i] * l1 + h2_before[i] * l2 + rdot(i, h2, frame); + dst[i^S] = clamp_s16(accu >> 14); + } + + l1 = dst[6 ^ S]; + l2 = dst[7 ^ S]; + + dst += 8; + count -= 16; + } while (count != 0); + + dram_store_u16(hle, (uint16_t*)(dst - 4), address, 4); +} + +void alist_iirf(CHle * hle, bool init, uint16_t dmemo, uint16_t dmemi, uint16_t count, int16_t* table, uint32_t address) +{ + int16_t *dst = (int16_t*)(hle->alist_buffer() + dmemo); + int32_t i, prev; + int16_t frame[8]; + int16_t ibuf[4]; + uint16_t index = 7; + + count = align(count, 16); + + if(init) + { + for(i = 0; i < 8; ++i) + { + frame[i] = 0; + } + ibuf[1] = 0; + ibuf[2] = 0; + } + else + { + frame[6] = *dram_u16(hle, address + 4); + frame[7] = *dram_u16(hle, address + 6); + ibuf[1] = (int16_t)*dram_u16(hle, address + 8); + ibuf[2] = (int16_t)*dram_u16(hle, address + 10); + } + + prev = vmulf(table[9], frame[6]) * 2; + do + { + for(i = 0; i < 8; ++i) + { + int32_t accu; + ibuf[index&3] = *alist_s16(hle, dmemi); + + accu = prev + vmulf(table[0], ibuf[index&3]) + vmulf(table[1], ibuf[(index-1)&3]) + vmulf(table[0], ibuf[(index-2)&3]); + accu += vmulf(table[8], frame[index]) * 2; + prev = vmulf(table[9], frame[index]) * 2; + dst[i^S] = frame[i] = accu; + + index=(index+1)&7; + dmemi += 2; + } + dst += 8; + count -= 0x10; + } while (count > 0); + + dram_store_u16(hle, (uint16_t*)&frame[6], address + 4, 4); + dram_store_u16(hle, (uint16_t*)&ibuf[(index-2)&3], address+8, 2); + dram_store_u16(hle, (uint16_t*)&ibuf[(index-1)&3], address+10, 2); +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/alist.h b/Source/Android/PluginRSP/alist.h new file mode 100644 index 000000000..f6783c2d8 --- /dev/null +++ b/Source/Android/PluginRSP/alist.h @@ -0,0 +1,56 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#pragma once +class CHle; + +typedef void(*acmd_callback_t)(CHle* hle, uint32_t w1, uint32_t w2); + +void alist_process(CHle * hle, const acmd_callback_t abi[], unsigned int abi_size); +uint32_t alist_get_address(CHle * hle, uint32_t so, const uint32_t *segments, size_t n); +void alist_set_address(CHle * hle, uint32_t so, uint32_t *segments, size_t n); +void alist_clear(CHle * hle, uint16_t dmem, uint16_t count); +void alist_load(CHle * hle, uint16_t dmem, uint32_t address, uint16_t count); +void alist_save(CHle * hle, uint16_t dmem, uint32_t address, uint16_t count); +void alist_move(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count); +void alist_copy_every_other_sample(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count); +void alist_repeat64(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint8_t count); +void alist_copy_blocks(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t block_size, uint8_t count); +void alist_interleave(CHle * hle, uint16_t dmemo, uint16_t left, uint16_t right, uint16_t count); +void alist_envmix_exp( CHle * hle, bool init, bool aux, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, uint16_t count, int16_t dry, int16_t wet, const int16_t *vol, const int16_t *target, const int32_t *rate, uint32_t address); +void alist_envmix_ge( CHle * hle, bool init, bool aux, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, uint16_t count, int16_t dry, int16_t wet, const int16_t *vol, const int16_t *target, const int32_t *rate, uint32_t address); +void alist_envmix_lin( CHle * hle, bool init, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, uint16_t count, int16_t dry, int16_t wet, const int16_t *vol, const int16_t *target, const int32_t *rate, uint32_t address); +void alist_envmix_nead( CHle * hle, bool swap_wet_LR, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, unsigned count, uint16_t *env_values, uint16_t *env_steps, const int16_t *xors); +void alist_mix(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count, int16_t gain); +void alist_multQ44(CHle * hle, uint16_t dmem, uint16_t count, int8_t gain); +void alist_add(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count); +void alist_adpcm( CHle * hle, bool init, bool loop, bool two_bit_per_sample, uint16_t dmemo, uint16_t dmemi, uint16_t count, const int16_t* codebook, uint32_t loop_address, uint32_t last_frame_address); +void alist_resample( CHle * hle, bool init, bool flag2, uint16_t dmemo, uint16_t dmemi, uint16_t count, uint32_t pitch, uint32_t address); +void alist_polef( CHle * hle, bool init,uint16_t dmemo, uint16_t dmemi, uint16_t count, uint16_t gain, int16_t* table, uint32_t address); +void alist_iirf( CHle * hle, bool init, uint16_t dmemo, uint16_t dmemi, uint16_t count, int16_t* table, uint32_t address); +void alist_resample_zoh( CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count, uint32_t pitch, uint32_t pitch_accu); +void alist_filter( CHle * hle, uint16_t dmem, uint16_t count, uint32_t address, const uint32_t* lut_address); + +/* +* Audio flags +*/ + +#define A_INIT 0x01 +#define A_CONTINUE 0x00 +#define A_LOOP 0x02 +#define A_OUT 0x02 +#define A_LEFT 0x02 +#define A_RIGHT 0x00 +#define A_VOL 0x04 +#define A_RATE 0x00 +#define A_AUX 0x08 +#define A_NOAUX 0x00 +#define A_MAIN 0x00 +#define A_MIX 0x10 diff --git a/Source/Android/PluginRSP/alist_audio.cpp b/Source/Android/PluginRSP/alist_audio.cpp new file mode 100644 index 000000000..a97fb7033 --- /dev/null +++ b/Source/Android/PluginRSP/alist_audio.cpp @@ -0,0 +1,306 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include + +#include "alist.h" +#include "common.h" +#include "hle.h" +#include "mem.h" +#include "ucodes.h" + +enum { DMEM_BASE = 0x5c0 }; + +/* helper functions */ +static uint32_t get_address(CHle * hle, uint32_t so) +{ + return alist_get_address(hle, so, hle->alist_audio().segments, N_SEGMENTS); +} + +static void set_address(CHle * hle, uint32_t so) +{ + alist_set_address(hle, so, hle->alist_audio().segments, N_SEGMENTS); +} + +static void clear_segments(CHle * hle) +{ + memset(hle->alist_audio().segments, 0, N_SEGMENTS*sizeof(hle->alist_audio().segments[0])); +} + +/* audio commands definition */ +static void SPNOOP(CHle * UNUSED(hle), uint32_t UNUSED(w1), uint32_t UNUSED(w2)) +{ +} + +static void CLEARBUFF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t dmem = (w1 + DMEM_BASE) & 0xFFFF; + uint16_t count = w2; + + if (count == 0) + return; + + alist_clear(hle, dmem, align(count, 16)); +} + +static void ENVMIXER(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16) & 0xFF; + uint32_t address = get_address(hle, w2); + + alist_envmix_exp( + hle, + flags & A_INIT, + flags & A_AUX, + hle->alist_audio().out, hle->alist_audio().dry_right, + hle->alist_audio().wet_left, hle->alist_audio().wet_right, + hle->alist_audio().in, hle->alist_audio().count, + hle->alist_audio().dry, hle->alist_audio().wet, + hle->alist_audio().vol, + hle->alist_audio().target, + hle->alist_audio().rate, + address); +} + +static void ENVMIXER_GE(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16); + uint32_t address = get_address(hle, w2); + + alist_envmix_ge( + hle, + flags & A_INIT, + flags & A_AUX, + hle->alist_audio().out, hle->alist_audio().dry_right, + hle->alist_audio().wet_left, hle->alist_audio().wet_right, + hle->alist_audio().in, hle->alist_audio().count, + hle->alist_audio().dry, hle->alist_audio().wet, + hle->alist_audio().vol, + hle->alist_audio().target, + hle->alist_audio().rate, + address); +} + +static void RESAMPLE(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16) & 0xFF; + uint16_t pitch = w1 & 0xFFFF; + uint32_t address = get_address(hle, w2); + + alist_resample( + hle, + flags & 0x1, + flags & 0x2, + hle->alist_audio().out, + hle->alist_audio().in, + align(hle->alist_audio().count, 16), + pitch << 1, + address); +} + +static void SETVOL(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16) & 0xFF; + + if (flags & A_AUX) + { + hle->alist_audio().dry = w1 & 0xFFFF; + hle->alist_audio().wet = w2 & 0xFFFF; + } + else + { + unsigned lr = (flags & A_LEFT) ? 0 : 1; + + if (flags & A_VOL) + { + hle->alist_audio().vol[lr] = w1 & 0xFFFF; + } + else + { + hle->alist_audio().target[lr] = w1 & 0xFFFF; + hle->alist_audio().rate[lr] = w2; + } + } +} + +static void SETLOOP(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + hle->alist_audio().loop = get_address(hle, w2); +} + +static void ADPCM(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16) & 0xFF; + uint32_t address = get_address(hle, w2); + + alist_adpcm( + hle, + flags & 0x1, + flags & 0x2, + false, /* unsupported in this ucode */ + hle->alist_audio().out, + hle->alist_audio().in, + align(hle->alist_audio().count, 32), + hle->alist_audio().table, + hle->alist_audio().loop, + address); +} + +static void LOADBUFF(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + uint32_t address = get_address(hle, w2); + + if (hle->alist_audio().count == 0) + { + return; + } + + alist_load(hle, hle->alist_audio().in, address, hle->alist_audio().count); +} + +static void SAVEBUFF(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + uint32_t address = get_address(hle, w2); + + if (hle->alist_audio().count == 0) + { + return; + } + alist_save(hle, hle->alist_audio().out, address, hle->alist_audio().count); +} + +static void SETBUFF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16) & 0xFF; + + if (flags & A_AUX) + { + hle->alist_audio().dry_right = (w1 + DMEM_BASE) & 0xFFFF; + hle->alist_audio().wet_left = (w2 >> 16) + DMEM_BASE; + hle->alist_audio().wet_right = (w2 + DMEM_BASE) & 0xFFFF; + } + else + { + hle->alist_audio().in = (w1 + DMEM_BASE) & 0xFFFF; + hle->alist_audio().out = ((w2 >> 16) + DMEM_BASE) & 0xFFFF; + hle->alist_audio().count = w2 & 0xFFFF; + } +} + +static void DMEMMOVE(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t dmemi = (w1 + DMEM_BASE) & 0xFFFF; + uint16_t dmemo = (w2 >> 16) + DMEM_BASE; + uint16_t count = (w2)& 0xFFFF; + + if (count == 0) + return; + + alist_move(hle, dmemo, dmemi, align(count, 16)); +} + +static void LOADADPCM(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = (w1 & 0xFFFF); + uint32_t address = get_address(hle, w2); + + dram_load_u16(hle, (uint16_t*)hle->alist_audio().table, address, align(count, 8) >> 1); +} + +static void INTERLEAVE(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + uint16_t left = (w2 >> 16) + DMEM_BASE; + uint16_t right = (w2 + DMEM_BASE) & 0xFFFF; + + if (hle->alist_audio().count == 0) + return; + + alist_interleave(hle, hle->alist_audio().out, left, right, align(hle->alist_audio().count, 16)); +} + +static void MIXER(CHle * hle, uint32_t w1, uint32_t w2) +{ + int16_t gain = (w1)& 0xFFFF; + uint16_t dmemi = ((w2 >> 16) + DMEM_BASE) & 0xFFFF; + uint16_t dmemo = (w2 + DMEM_BASE) & 0xFFFF; + + if (hle->alist_audio().count == 0) + return; + + alist_mix(hle, dmemo, dmemi, align(hle->alist_audio().count, 32), gain); +} + +static void SEGMENT(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + set_address(hle, w2); +} + +static void POLEF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16); + uint16_t gain = w1; + uint32_t address = get_address(hle, w2); + + if (hle->alist_audio().count == 0) + return; + + alist_polef( + hle, + flags & A_INIT, + hle->alist_audio().out, + hle->alist_audio().in, + align(hle->alist_audio().count, 16), + gain, + hle->alist_audio().table, + address); +} + +/* global functions */ +void alist_process_audio(CHle * hle) +{ + static const acmd_callback_t ABI[0x10] = { + SPNOOP, ADPCM, CLEARBUFF, ENVMIXER, + LOADBUFF, RESAMPLE, SAVEBUFF, SEGMENT, + SETBUFF, SETVOL, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, POLEF, SETLOOP + }; + + clear_segments(hle); + alist_process(hle, ABI, 0x10); +} + +void alist_process_audio_ge(CHle * hle) +{ + static const acmd_callback_t ABI[0x10] = + { + SPNOOP, ADPCM, CLEARBUFF, ENVMIXER_GE, + LOADBUFF, RESAMPLE, SAVEBUFF, SEGMENT, + SETBUFF, SETVOL, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, POLEF, SETLOOP + }; + + clear_segments(hle); + alist_process(hle, ABI, 0x10); +} + +void alist_process_audio_bc(CHle * hle) +{ + static const acmd_callback_t ABI[0x10] = + { + SPNOOP, ADPCM, CLEARBUFF, ENVMIXER_GE, + LOADBUFF, RESAMPLE, SAVEBUFF, SEGMENT, + SETBUFF, SETVOL, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, POLEF, SETLOOP + }; + + clear_segments(hle); + alist_process(hle, ABI, 0x10); +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/alist_naudio.cpp b/Source/Android/PluginRSP/alist_naudio.cpp new file mode 100644 index 000000000..e619ce32c --- /dev/null +++ b/Source/Android/PluginRSP/alist_naudio.cpp @@ -0,0 +1,293 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" + +#include "alist.h" +#include "mem.h" + +enum { NAUDIO_COUNT = 0x170 }; /* ie 184 samples */ +enum { + NAUDIO_MAIN = 0x4f0, + NAUDIO_MAIN2 = 0x660, + NAUDIO_DRY_LEFT = 0x9d0, + NAUDIO_DRY_RIGHT = 0xb40, + NAUDIO_WET_LEFT = 0xcb0, + NAUDIO_WET_RIGHT = 0xe20 +}; + +/* audio commands definition */ +static void UNKNOWN(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t acmd = (w1 >> 24); + + hle->WarnMessage("Unknown audio command %d: %08x %08x", acmd, w1, w2); +} + +static void SPNOOP(CHle * UNUSED(hle), uint32_t UNUSED(w1), uint32_t UNUSED(w2)) +{ +} + +static void NAUDIO_0000(CHle * hle, uint32_t w1, uint32_t w2) +{ + /* ??? */ + UNKNOWN(hle, w1, w2); +} + +static void NAUDIO_02B0(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + /* emulate code at 0x12b0 (inside SETVOL), because PC always execute in IMEM */ + hle->alist_naudio().rate[1] &= ~0xffff; + hle->alist_naudio().rate[1] |= (w2 & 0xffff); +} + +static void NAUDIO_14(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16); + uint16_t gain = w1; + uint8_t select_main = (w2 >> 24); + uint32_t address = (w2 & 0xffffff); + + uint16_t dmem = (select_main == 0) ? NAUDIO_MAIN : NAUDIO_MAIN2; + + if (hle->alist_naudio().table[0] == 0 && hle->alist_naudio().table[1] == 0) + { + alist_polef(hle, flags & A_INIT, dmem, dmem, NAUDIO_COUNT, gain, hle->alist_naudio().table, address); + } + else + { + alist_iirf( hle, flags & A_INIT, dmem, dmem, NAUDIO_COUNT, hle->alist_naudio().table, address); + } +} + +static void SETVOL(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16); + + if (flags & 0x4) { + if (flags & 0x2) { + hle->alist_naudio().vol[0] = w1; + hle->alist_naudio().dry = (w2 >> 16); + hle->alist_naudio().wet = w2; + } + else { + hle->alist_naudio().target[1] = w1; + hle->alist_naudio().rate[1] = w2; + } + } + else + { + hle->alist_naudio().target[0] = w1; + hle->alist_naudio().rate[0] = w2; + } +} + +static void ENVMIXER(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16); + uint32_t address = (w2 & 0xffffff); + + hle->alist_naudio().vol[1] = w1; + + alist_envmix_lin( + hle, + flags & 0x1, + NAUDIO_DRY_LEFT, + NAUDIO_DRY_RIGHT, + NAUDIO_WET_LEFT, + NAUDIO_WET_RIGHT, + NAUDIO_MAIN, + NAUDIO_COUNT, + hle->alist_naudio().dry, + hle->alist_naudio().wet, + hle->alist_naudio().vol, + hle->alist_naudio().target, + hle->alist_naudio().rate, + address); +} + +static void CLEARBUFF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t dmem = w1 + NAUDIO_MAIN; + uint16_t count = w2; + + alist_clear(hle, dmem, count); +} + +static void MIXER(CHle * hle, uint32_t w1, uint32_t w2) +{ + int16_t gain = w1; + uint16_t dmemi = (w2 >> 16) + NAUDIO_MAIN; + uint16_t dmemo = w2 + NAUDIO_MAIN; + + alist_mix(hle, dmemo, dmemi, NAUDIO_COUNT, gain); +} + +static void LOADBUFF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = (w1 >> 12) & 0xfff; + uint16_t dmem = (w1 & 0xfff) + NAUDIO_MAIN; + uint32_t address = (w2 & 0xffffff); + + alist_load(hle, dmem, address, count); +} + +static void SAVEBUFF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = (w1 >> 12) & 0xfff; + uint16_t dmem = (w1 & 0xfff) + NAUDIO_MAIN; + uint32_t address = (w2 & 0xffffff); + + alist_save(hle, dmem, address, count); +} + +static void LOADADPCM(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = w1; + uint32_t address = (w2 & 0xffffff); + + dram_load_u16(hle, (uint16_t*)hle->alist_naudio().table, address, count >> 1); +} + +static void DMEMMOVE(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t dmemi = w1 + NAUDIO_MAIN; + uint16_t dmemo = (w2 >> 16) + NAUDIO_MAIN; + uint16_t count = w2; + + alist_move(hle, dmemo, dmemi, (count + 3) & ~3); +} + +static void SETLOOP(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + hle->alist_naudio().loop = (w2 & 0xffffff); +} + +static void ADPCM(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint32_t address = (w1 & 0xffffff); + uint8_t flags = (w2 >> 28); + uint16_t count = (w2 >> 16) & 0xfff; + uint16_t dmemi = ((w2 >> 12) & 0xf) + NAUDIO_MAIN; + uint16_t dmemo = (w2 & 0xfff) + NAUDIO_MAIN; + + alist_adpcm( + hle, + flags & 0x1, + flags & 0x2, + false, /* unsuported by this ucode */ + dmemo, + dmemi, + (count + 0x1f) & ~0x1f, + hle->alist_naudio().table, + hle->alist_naudio().loop, + address); +} + +static void RESAMPLE(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint32_t address = (w1 & 0xffffff); + uint8_t flags = (w2 >> 30); + uint16_t pitch = (w2 >> 14); + uint16_t dmemi = ((w2 >> 2) & 0xfff) + NAUDIO_MAIN; + uint16_t dmemo = (w2 & 0x3) ? NAUDIO_MAIN2 : NAUDIO_MAIN; + + alist_resample( + hle, + flags & 0x1, + false, /* TODO: check which ABI supports it */ + dmemo, + dmemi, + NAUDIO_COUNT, + pitch << 1, + address); +} + +static void INTERLEAVE(CHle * hle, uint32_t UNUSED(w1), uint32_t UNUSED(w2)) +{ + alist_interleave(hle, NAUDIO_MAIN, NAUDIO_DRY_LEFT, NAUDIO_DRY_RIGHT, NAUDIO_COUNT); +} + +static void MP3ADDY(CHle * UNUSED(hle), uint32_t UNUSED(w1), uint32_t UNUSED(w2)) +{ +} + +static void MP3(CHle * hle, uint32_t w1, uint32_t w2) +{ + unsigned index = (w1 & 0x1e); + uint32_t address = (w2 & 0xffffff); + + mp3_task(hle, index, address); +} + +/* global functions */ +void alist_process_naudio(CHle * hle) +{ + static const acmd_callback_t ABI[0x10] = + { + SPNOOP, ADPCM, CLEARBUFF, ENVMIXER, + LOADBUFF, RESAMPLE, SAVEBUFF, NAUDIO_0000, + NAUDIO_0000, SETVOL, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, NAUDIO_02B0, SETLOOP + }; + + alist_process(hle, ABI, 0x10); +} + +void alist_process_naudio_bk(CHle * hle) +{ + /* TODO: see what differs from alist_process_naudio */ + static const acmd_callback_t ABI[0x10] = { + SPNOOP, ADPCM, CLEARBUFF, ENVMIXER, + LOADBUFF, RESAMPLE, SAVEBUFF, NAUDIO_0000, + NAUDIO_0000, SETVOL, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, NAUDIO_02B0, SETLOOP + }; + + alist_process(hle, ABI, 0x10); +} + +void alist_process_naudio_dk(CHle * hle) +{ + /* TODO: see what differs from alist_process_naudio */ + static const acmd_callback_t ABI[0x10] = { + SPNOOP, ADPCM, CLEARBUFF, ENVMIXER, + LOADBUFF, RESAMPLE, SAVEBUFF, MIXER, + MIXER, SETVOL, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, NAUDIO_02B0, SETLOOP + }; + + alist_process(hle, ABI, 0x10); +} + +void alist_process_naudio_mp3(CHle * hle) +{ + static const acmd_callback_t ABI[0x10] = { + UNKNOWN, ADPCM, CLEARBUFF, ENVMIXER, + LOADBUFF, RESAMPLE, SAVEBUFF, MP3, + MP3ADDY, SETVOL, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, NAUDIO_14, SETLOOP + }; + + alist_process(hle, ABI, 0x10); +} + +void alist_process_naudio_cbfd(CHle * hle) +{ + /* TODO: see what differs from alist_process_naudio_mp3 */ + static const acmd_callback_t ABI[0x10] = { + UNKNOWN, ADPCM, CLEARBUFF, ENVMIXER, + LOADBUFF, RESAMPLE, SAVEBUFF, MP3, + MP3ADDY, SETVOL, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, NAUDIO_14, SETLOOP + }; + + alist_process(hle, ABI, 0x10); +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/alist_nead.cpp b/Source/Android/PluginRSP/alist_nead.cpp new file mode 100644 index 000000000..cbdbbb073 --- /dev/null +++ b/Source/Android/PluginRSP/alist_nead.cpp @@ -0,0 +1,494 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" + +#include "alist.h" +#include "mem.h" + +/* audio commands definition */ +static void UNKNOWN(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t acmd = (w1 >> 24); + hle->WarnMessage("Unknown audio command %d: %08x %08x", acmd, w1, w2); +} + +static void SPNOOP(CHle * UNUSED(hle), uint32_t UNUSED(w1), uint32_t UNUSED(w2)) +{ +} + +static void LOADADPCM(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = w1; + uint32_t address = (w2 & 0xffffff); + + dram_load_u16(hle, (uint16_t*)hle->alist_nead().table, address, count >> 1); +} + +static void SETLOOP(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + hle->alist_nead().loop = w2 & 0xffffff; +} + +static void SETBUFF(CHle * hle, uint32_t w1, uint32_t w2) +{ + hle->alist_nead().in = w1; + hle->alist_nead().out = (w2 >> 16); + hle->alist_nead().count = w2; +} + +static void ADPCM(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16); + uint32_t address = (w2 & 0xffffff); + + alist_adpcm( + hle, + flags & 0x1, + flags & 0x2, + flags & 0x4, + hle->alist_nead().out, + hle->alist_nead().in, + (hle->alist_nead().count + 0x1f) & ~0x1f, + hle->alist_nead().table, + hle->alist_nead().loop, + address); +} + +static void CLEARBUFF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t dmem = w1; + uint16_t count = w2; + + if (count == 0) + return; + + alist_clear(hle, dmem, count); +} + +static void LOADBUFF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = (w1 >> 12) & 0xfff; + uint16_t dmem = (w1 & 0xfff); + uint32_t address = (w2 & 0xffffff); + + alist_load(hle, dmem, address, count); +} + +static void SAVEBUFF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = (w1 >> 12) & 0xfff; + uint16_t dmem = (w1 & 0xfff); + uint32_t address = (w2 & 0xffffff); + + alist_save(hle, dmem, address, count); +} + +static void MIXER(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = (w1 >> 12) & 0xff0; + int16_t gain = w1; + uint16_t dmemi = (w2 >> 16); + uint16_t dmemo = w2; + + alist_mix(hle, dmemo, dmemi, count, gain); +} + +static void RESAMPLE(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16); + uint16_t pitch = w1; + uint32_t address = (w2 & 0xffffff); + + alist_resample( + hle, + flags & 0x1, + false, /* TODO: check which ABI supports it */ + hle->alist_nead().out, + hle->alist_nead().in, + (hle->alist_nead().count + 0xf) & ~0xf, + pitch << 1, + address); +} + +static void RESAMPLE_ZOH(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t pitch = w1; + uint16_t pitch_accu = w2; + + alist_resample_zoh( + hle, + hle->alist_nead().out, + hle->alist_nead().in, + hle->alist_nead().count, + pitch << 1, + pitch_accu); +} + +static void DMEMMOVE(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t dmemi = w1; + uint16_t dmemo = (w2 >> 16); + uint16_t count = w2; + + if (count == 0) + return; + + alist_move(hle, dmemo, dmemi, (count + 3) & ~3); +} + +static void ENVSETUP1_MK(CHle * hle, uint32_t w1, uint32_t w2) +{ + hle->alist_nead().env_values[2] = (w1 >> 8) & 0xff00; + hle->alist_nead().env_steps[2] = 0; + hle->alist_nead().env_steps[0] = (w2 >> 16); + hle->alist_nead().env_steps[1] = w2; +} + +static void ENVSETUP1(CHle * hle, uint32_t w1, uint32_t w2) +{ + hle->alist_nead().env_values[2] = (w1 >> 8) & 0xff00; + hle->alist_nead().env_steps[2] = w1; + hle->alist_nead().env_steps[0] = (w2 >> 16); + hle->alist_nead().env_steps[1] = w2; +} + +static void ENVSETUP2(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + hle->alist_nead().env_values[0] = (w2 >> 16); + hle->alist_nead().env_values[1] = w2; +} + +static void ENVMIXER_MK(CHle * hle, uint32_t w1, uint32_t w2) +{ + int16_t xors[4]; + + uint16_t dmemi = (w1 >> 12) & 0xff0; + uint8_t count = (w1 >> 8) & 0xff; + uint16_t dmem_dl = (w2 >> 20) & 0xff0; + uint16_t dmem_dr = (w2 >> 12) & 0xff0; + uint16_t dmem_wl = (w2 >> 4) & 0xff0; + uint16_t dmem_wr = (w2 << 4) & 0xff0; + + xors[2] = 0; /* unsupported by this ucode */ + xors[3] = 0; /* unsupported by this ucode */ + xors[0] = 0 - (int16_t)((w1 & 0x2) >> 1); + xors[1] = 0 - (int16_t)((w1 & 0x1) ); + + alist_envmix_nead( + hle, + false, /* unsupported by this ucode */ + dmem_dl, dmem_dr, + dmem_wl, dmem_wr, + dmemi, count, + hle->alist_nead().env_values, + hle->alist_nead().env_steps, + xors); +} + +static void ENVMIXER(CHle * hle, uint32_t w1, uint32_t w2) +{ + int16_t xors[4]; + + uint16_t dmemi = (w1 >> 12) & 0xff0; + uint8_t count = (w1 >> 8) & 0xff; + bool swap_wet_LR = (w1 >> 4) & 0x1; + uint16_t dmem_dl = (w2 >> 20) & 0xff0; + uint16_t dmem_dr = (w2 >> 12) & 0xff0; + uint16_t dmem_wl = (w2 >> 4) & 0xff0; + uint16_t dmem_wr = (w2 << 4) & 0xff0; + + xors[2] = 0 - (int16_t)((w1 & 0x8) >> 1); + xors[3] = 0 - (int16_t)((w1 & 0x4) >> 1); + xors[0] = 0 - (int16_t)((w1 & 0x2) >> 1); + xors[1] = 0 - (int16_t)((w1 & 0x1) ); + + alist_envmix_nead( + hle, + swap_wet_LR, + dmem_dl, dmem_dr, + dmem_wl, dmem_wr, + dmemi, count, + hle->alist_nead().env_values, + hle->alist_nead().env_steps, + xors); +} + +static void DUPLICATE(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t count = (w1 >> 16); + uint16_t dmemi = w1; + uint16_t dmemo = (w2 >> 16); + + alist_repeat64(hle, dmemo, dmemi, count); +} + +static void INTERL(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = w1; + uint16_t dmemi = (w2 >> 16); + uint16_t dmemo = w2; + + alist_copy_every_other_sample(hle, dmemo, dmemi, count); +} + +static void INTERLEAVE_MK(CHle * hle, uint32_t UNUSED(w1), uint32_t w2) +{ + uint16_t left = (w2 >> 16); + uint16_t right = w2; + + if (hle->alist_nead().count == 0) + return; + + alist_interleave(hle, hle->alist_nead().out, left, right, hle->alist_nead().count); +} + +static void INTERLEAVE(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = ((w1 >> 12) & 0xff0); + uint16_t dmemo = w1; + uint16_t left = (w2 >> 16); + uint16_t right = w2; + + alist_interleave(hle, dmemo, left, right, count); +} + +static void ADDMIXER(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint16_t count = (w1 >> 12) & 0xff0; + uint16_t dmemi = (w2 >> 16); + uint16_t dmemo = w2; + + alist_add(hle, dmemo, dmemi, count); +} + +static void HILOGAIN(CHle * hle, uint32_t w1, uint32_t w2) +{ + int8_t gain = (w1 >> 16); /* Q4.4 signed */ + uint16_t count = w1; + uint16_t dmem = (w2 >> 16); + + alist_multQ44(hle, dmem, count, gain); +} + +static void FILTER(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16); + uint32_t address = (w2 & 0xffffff); + + if (flags > 1) { + hle->alist_nead().filter_count = w1; + hle->alist_nead().filter_lut_address[0] = address; /* t6 */ + } + else { + uint16_t dmem = w1; + + hle->alist_nead().filter_lut_address[1] = address + 0x10; /* t5 */ + alist_filter(hle, dmem, hle->alist_nead().filter_count, address, hle->alist_nead().filter_lut_address); + } +} + +static void SEGMENT(CHle * UNUSED(hle), uint32_t UNUSED(w1), uint32_t UNUSED(w2)) +{ +} + +static void NEAD_16(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t count = (w1 >> 16); + uint16_t dmemi = w1; + uint16_t dmemo = (w2 >> 16); + uint16_t block_size = w2; + + alist_copy_blocks(hle, dmemo, dmemi, block_size, count); +} + +static void POLEF(CHle * hle, uint32_t w1, uint32_t w2) +{ + uint8_t flags = (w1 >> 16); + uint16_t gain = w1; + uint32_t address = (w2 & 0xffffff); + + if (hle->alist_nead().count == 0) + return; + + alist_polef( + hle, + flags & A_INIT, + hle->alist_nead().out, + hle->alist_nead().in, + hle->alist_nead().count, + gain, + hle->alist_nead().table, + address); +} + +void alist_process_nead_mk(CHle * hle) +{ + static const acmd_callback_t ABI[0x20] = { + SPNOOP, ADPCM, CLEARBUFF, SPNOOP, + SPNOOP, RESAMPLE, SPNOOP, SEGMENT, + SETBUFF, SPNOOP, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE_MK, POLEF, SETLOOP, + NEAD_16, INTERL, ENVSETUP1_MK, ENVMIXER_MK, + LOADBUFF, SAVEBUFF, ENVSETUP2, SPNOOP, + SPNOOP, SPNOOP, SPNOOP, SPNOOP, + SPNOOP, SPNOOP, SPNOOP, SPNOOP + }; + + alist_process(hle, ABI, 0x20); +} + +void alist_process_nead_sf(CHle * hle) +{ + static const acmd_callback_t ABI[0x20] = { + SPNOOP, ADPCM, CLEARBUFF, SPNOOP, + ADDMIXER, RESAMPLE, RESAMPLE_ZOH, SPNOOP, + SETBUFF, SPNOOP, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE_MK, POLEF, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, SPNOOP, + HILOGAIN, UNKNOWN, DUPLICATE, SPNOOP, + SPNOOP, SPNOOP, SPNOOP, SPNOOP + }; + + alist_process(hle, ABI, 0x20); +} + +void alist_process_nead_sfj(CHle * hle) +{ + static const acmd_callback_t ABI[0x20] = { + SPNOOP, ADPCM, CLEARBUFF, SPNOOP, + ADDMIXER, RESAMPLE, RESAMPLE_ZOH, SPNOOP, + SETBUFF, SPNOOP, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE_MK, POLEF, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, UNKNOWN, + HILOGAIN, UNKNOWN, DUPLICATE, SPNOOP, + SPNOOP, SPNOOP, SPNOOP, SPNOOP + }; + + alist_process(hle, ABI, 0x20); +} + +void alist_process_nead_fz(CHle * hle) +{ + static const acmd_callback_t ABI[0x20] = { + UNKNOWN, ADPCM, CLEARBUFF, SPNOOP, + ADDMIXER, RESAMPLE, SPNOOP, SPNOOP, + SETBUFF, SPNOOP, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, SPNOOP, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, UNKNOWN, + SPNOOP, UNKNOWN, DUPLICATE, SPNOOP, + SPNOOP, SPNOOP, SPNOOP, SPNOOP + }; + + alist_process(hle, ABI, 0x20); +} + +void alist_process_nead_wrjb(CHle * hle) +{ + static const acmd_callback_t ABI[0x20] = { + SPNOOP, ADPCM, CLEARBUFF, UNKNOWN, + ADDMIXER, RESAMPLE, RESAMPLE_ZOH, SPNOOP, + SETBUFF, SPNOOP, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, SPNOOP, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, UNKNOWN, + HILOGAIN, UNKNOWN, DUPLICATE, FILTER, + SPNOOP, SPNOOP, SPNOOP, SPNOOP + }; + + alist_process(hle, ABI, 0x20); +} + +void alist_process_nead_ys(CHle * hle) +{ + static const acmd_callback_t ABI[0x18] = { + UNKNOWN, ADPCM, CLEARBUFF, UNKNOWN, + ADDMIXER, RESAMPLE, RESAMPLE_ZOH, FILTER, + SETBUFF, DUPLICATE, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, HILOGAIN, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, UNKNOWN + }; + + alist_process(hle, ABI, 0x18); +} + +void alist_process_nead_1080(CHle * hle) +{ + static const acmd_callback_t ABI[0x18] = { + UNKNOWN, ADPCM, CLEARBUFF, UNKNOWN, + ADDMIXER, RESAMPLE, RESAMPLE_ZOH, FILTER, + SETBUFF, DUPLICATE, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, HILOGAIN, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, UNKNOWN + }; + + alist_process(hle, ABI, 0x18); +} + +void alist_process_nead_oot(CHle * hle) +{ + static const acmd_callback_t ABI[0x18] = { + UNKNOWN, ADPCM, CLEARBUFF, UNKNOWN, + ADDMIXER, RESAMPLE, RESAMPLE_ZOH, FILTER, + SETBUFF, DUPLICATE, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, HILOGAIN, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, UNKNOWN + }; + + alist_process(hle, ABI, 0x18); +} + +void alist_process_nead_mm(CHle * hle) +{ + static const acmd_callback_t ABI[0x18] = { + UNKNOWN, ADPCM, CLEARBUFF, SPNOOP, + ADDMIXER, RESAMPLE, RESAMPLE_ZOH, FILTER, + SETBUFF, DUPLICATE, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, HILOGAIN, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, UNKNOWN + }; + + alist_process(hle, ABI, 0x18); +} + +void alist_process_nead_mmb(CHle * hle) +{ + static const acmd_callback_t ABI[0x18] = { + SPNOOP, ADPCM, CLEARBUFF, SPNOOP, + ADDMIXER, RESAMPLE, RESAMPLE_ZOH, FILTER, + SETBUFF, DUPLICATE, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, HILOGAIN, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, UNKNOWN + }; + + alist_process(hle, ABI, 0x18); +} + +void alist_process_nead_ac(CHle * hle) +{ + static const acmd_callback_t ABI[0x18] = { + UNKNOWN, ADPCM, CLEARBUFF, SPNOOP, + ADDMIXER, RESAMPLE, RESAMPLE_ZOH, FILTER, + SETBUFF, DUPLICATE, DMEMMOVE, LOADADPCM, + MIXER, INTERLEAVE, HILOGAIN, SETLOOP, + NEAD_16, INTERL, ENVSETUP1, ENVMIXER, + LOADBUFF, SAVEBUFF, ENVSETUP2, UNKNOWN + }; + + alist_process(hle, ABI, 0x18); +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/arithmetics.h b/Source/Android/PluginRSP/arithmetics.h new file mode 100644 index 000000000..30aec3c07 --- /dev/null +++ b/Source/Android/PluginRSP/arithmetics.h @@ -0,0 +1,24 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#pragma once + +static inline int16_t clamp_s16(int_fast32_t x) +{ + x = (x < INT16_MIN) ? INT16_MIN : x; + x = (x > INT16_MAX) ? INT16_MAX : x; + + return (int16_t)x; +} + +static inline int32_t vmulf(int16_t x, int16_t y) +{ + return (((int32_t)(x))*((int32_t)(y))+0x4000)>>15; +} diff --git a/Source/Android/PluginRSP/audio.cpp b/Source/Android/PluginRSP/audio.cpp new file mode 100644 index 000000000..9b10f8c73 --- /dev/null +++ b/Source/Android/PluginRSP/audio.cpp @@ -0,0 +1,118 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include +#include "audio.h" + +#include "arithmetics.h" + +const int16_t RESAMPLE_LUT[64 * 4] = +{ + (int16_t)0x0c39u, (int16_t)0x66adu, (int16_t)0x0d46u, (int16_t)0xffdfu, + (int16_t)0x0b39u, (int16_t)0x6696u, (int16_t)0x0e5fu, (int16_t)0xffd8u, + (int16_t)0x0a44u, (int16_t)0x6669u, (int16_t)0x0f83u, (int16_t)0xffd0u, + (int16_t)0x095au, (int16_t)0x6626u, (int16_t)0x10b4u, (int16_t)0xffc8u, + (int16_t)0x087du, (int16_t)0x65cdu, (int16_t)0x11f0u, (int16_t)0xffbfu, + (int16_t)0x07abu, (int16_t)0x655eu, (int16_t)0x1338u, (int16_t)0xffb6u, + (int16_t)0x06e4u, (int16_t)0x64d9u, (int16_t)0x148cu, (int16_t)0xffacu, + (int16_t)0x0628u, (int16_t)0x643fu, (int16_t)0x15ebu, (int16_t)0xffa1u, + (int16_t)0x0577u, (int16_t)0x638fu, (int16_t)0x1756u, (int16_t)0xff96u, + (int16_t)0x04d1u, (int16_t)0x62cbu, (int16_t)0x18cbu, (int16_t)0xff8au, + (int16_t)0x0435u, (int16_t)0x61f3u, (int16_t)0x1a4cu, (int16_t)0xff7eu, + (int16_t)0x03a4u, (int16_t)0x6106u, (int16_t)0x1bd7u, (int16_t)0xff71u, + (int16_t)0x031cu, (int16_t)0x6007u, (int16_t)0x1d6cu, (int16_t)0xff64u, + (int16_t)0x029fu, (int16_t)0x5ef5u, (int16_t)0x1f0bu, (int16_t)0xff56u, + (int16_t)0x022au, (int16_t)0x5dd0u, (int16_t)0x20b3u, (int16_t)0xff48u, + (int16_t)0x01beu, (int16_t)0x5c9au, (int16_t)0x2264u, (int16_t)0xff3au, + (int16_t)0x015bu, (int16_t)0x5b53u, (int16_t)0x241eu, (int16_t)0xff2cu, + (int16_t)0x0101u, (int16_t)0x59fcu, (int16_t)0x25e0u, (int16_t)0xff1eu, + (int16_t)0x00aeu, (int16_t)0x5896u, (int16_t)0x27a9u, (int16_t)0xff10u, + (int16_t)0x0063u, (int16_t)0x5720u, (int16_t)0x297au, (int16_t)0xff02u, + (int16_t)0x001fu, (int16_t)0x559du, (int16_t)0x2b50u, (int16_t)0xfef4u, + (int16_t)0xffe2u, (int16_t)0x540du, (int16_t)0x2d2cu, (int16_t)0xfee8u, + (int16_t)0xffacu, (int16_t)0x5270u, (int16_t)0x2f0du, (int16_t)0xfedbu, + (int16_t)0xff7cu, (int16_t)0x50c7u, (int16_t)0x30f3u, (int16_t)0xfed0u, + (int16_t)0xff53u, (int16_t)0x4f14u, (int16_t)0x32dcu, (int16_t)0xfec6u, + (int16_t)0xff2eu, (int16_t)0x4d57u, (int16_t)0x34c8u, (int16_t)0xfebdu, + (int16_t)0xff0fu, (int16_t)0x4b91u, (int16_t)0x36b6u, (int16_t)0xfeb6u, + (int16_t)0xfef5u, (int16_t)0x49c2u, (int16_t)0x38a5u, (int16_t)0xfeb0u, + (int16_t)0xfedfu, (int16_t)0x47edu, (int16_t)0x3a95u, (int16_t)0xfeacu, + (int16_t)0xfeceu, (int16_t)0x4611u, (int16_t)0x3c85u, (int16_t)0xfeabu, + (int16_t)0xfec0u, (int16_t)0x4430u, (int16_t)0x3e74u, (int16_t)0xfeacu, + (int16_t)0xfeb6u, (int16_t)0x424au, (int16_t)0x4060u, (int16_t)0xfeafu, + (int16_t)0xfeafu, (int16_t)0x4060u, (int16_t)0x424au, (int16_t)0xfeb6u, + (int16_t)0xfeacu, (int16_t)0x3e74u, (int16_t)0x4430u, (int16_t)0xfec0u, + (int16_t)0xfeabu, (int16_t)0x3c85u, (int16_t)0x4611u, (int16_t)0xfeceu, + (int16_t)0xfeacu, (int16_t)0x3a95u, (int16_t)0x47edu, (int16_t)0xfedfu, + (int16_t)0xfeb0u, (int16_t)0x38a5u, (int16_t)0x49c2u, (int16_t)0xfef5u, + (int16_t)0xfeb6u, (int16_t)0x36b6u, (int16_t)0x4b91u, (int16_t)0xff0fu, + (int16_t)0xfebdu, (int16_t)0x34c8u, (int16_t)0x4d57u, (int16_t)0xff2eu, + (int16_t)0xfec6u, (int16_t)0x32dcu, (int16_t)0x4f14u, (int16_t)0xff53u, + (int16_t)0xfed0u, (int16_t)0x30f3u, (int16_t)0x50c7u, (int16_t)0xff7cu, + (int16_t)0xfedbu, (int16_t)0x2f0du, (int16_t)0x5270u, (int16_t)0xffacu, + (int16_t)0xfee8u, (int16_t)0x2d2cu, (int16_t)0x540du, (int16_t)0xffe2u, + (int16_t)0xfef4u, (int16_t)0x2b50u, (int16_t)0x559du, (int16_t)0x001fu, + (int16_t)0xff02u, (int16_t)0x297au, (int16_t)0x5720u, (int16_t)0x0063u, + (int16_t)0xff10u, (int16_t)0x27a9u, (int16_t)0x5896u, (int16_t)0x00aeu, + (int16_t)0xff1eu, (int16_t)0x25e0u, (int16_t)0x59fcu, (int16_t)0x0101u, + (int16_t)0xff2cu, (int16_t)0x241eu, (int16_t)0x5b53u, (int16_t)0x015bu, + (int16_t)0xff3au, (int16_t)0x2264u, (int16_t)0x5c9au, (int16_t)0x01beu, + (int16_t)0xff48u, (int16_t)0x20b3u, (int16_t)0x5dd0u, (int16_t)0x022au, + (int16_t)0xff56u, (int16_t)0x1f0bu, (int16_t)0x5ef5u, (int16_t)0x029fu, + (int16_t)0xff64u, (int16_t)0x1d6cu, (int16_t)0x6007u, (int16_t)0x031cu, + (int16_t)0xff71u, (int16_t)0x1bd7u, (int16_t)0x6106u, (int16_t)0x03a4u, + (int16_t)0xff7eu, (int16_t)0x1a4cu, (int16_t)0x61f3u, (int16_t)0x0435u, + (int16_t)0xff8au, (int16_t)0x18cbu, (int16_t)0x62cbu, (int16_t)0x04d1u, + (int16_t)0xff96u, (int16_t)0x1756u, (int16_t)0x638fu, (int16_t)0x0577u, + (int16_t)0xffa1u, (int16_t)0x15ebu, (int16_t)0x643fu, (int16_t)0x0628u, + (int16_t)0xffacu, (int16_t)0x148cu, (int16_t)0x64d9u, (int16_t)0x06e4u, + (int16_t)0xffb6u, (int16_t)0x1338u, (int16_t)0x655eu, (int16_t)0x07abu, + (int16_t)0xffbfu, (int16_t)0x11f0u, (int16_t)0x65cdu, (int16_t)0x087du, + (int16_t)0xffc8u, (int16_t)0x10b4u, (int16_t)0x6626u, (int16_t)0x095au, + (int16_t)0xffd0u, (int16_t)0x0f83u, (int16_t)0x6669u, (int16_t)0x0a44u, + (int16_t)0xffd8u, (int16_t)0x0e5fu, (int16_t)0x6696u, (int16_t)0x0b39u, + (int16_t)0xffdfu, (int16_t)0x0d46u, (int16_t)0x66adu, (int16_t)0x0c39u +}; + +int32_t rdot(size_t n, const int16_t *x, const int16_t *y) +{ + int32_t accu = 0; + + y += n; + + while (n != 0) { + accu += *(x++) * *(--y); + --n; + } + + return accu; +} + +void adpcm_compute_residuals(int16_t* dst, const int16_t* src, + const int16_t* cb_entry, const int16_t* last_samples, size_t count) +{ + const int16_t* const book1 = cb_entry; + const int16_t* const book2 = cb_entry + 8; + + const int16_t l1 = last_samples[0]; + const int16_t l2 = last_samples[1]; + + size_t i; + + assert(count <= 8); + + for (i = 0; i < count; ++i) + { + int32_t accu = (int32_t)src[i] << 11; + accu += book1[i] * l1 + book2[i] * l2 + rdot(i, book2, src); + dst[i] = clamp_s16(accu >> 11); + } +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/audio.h b/Source/Android/PluginRSP/audio.h new file mode 100644 index 000000000..6d7a66ae7 --- /dev/null +++ b/Source/Android/PluginRSP/audio.h @@ -0,0 +1,26 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#pragma once + +extern const int16_t RESAMPLE_LUT[64 * 4]; + +int32_t rdot(size_t n, const int16_t *x, const int16_t *y); + +static inline int16_t adpcm_predict_sample(uint8_t byte, uint8_t mask, + unsigned lshift, unsigned rshift) +{ + int16_t sample = (uint16_t)(byte & mask) << lshift; + sample >>= rshift; /* signed */ + return sample; +} + +void adpcm_compute_residuals(int16_t* dst, const int16_t* src, + const int16_t* cb_entry, const int16_t* last_samples, size_t count); diff --git a/Source/Android/PluginRSP/cicx105.cpp b/Source/Android/PluginRSP/cicx105.cpp new file mode 100644 index 000000000..e8be95f6d --- /dev/null +++ b/Source/Android/PluginRSP/cicx105.cpp @@ -0,0 +1,39 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include + +/** +* During IPL3 stage of CIC x105 games, the RSP performs some checks and transactions +* necessary for booting the game. +* +* We only implement the needed DMA transactions for booting. +* +* Found in Banjo-Tooie, Zelda, Perfect Dark, ...) +**/ +void cicx105_ucode(CHle * hle) +{ + /* memcpy is okay to use because access constrains are met (alignment, size) */ + unsigned int i; + unsigned char *dst = hle->dram() + 0x2fb1f0; + unsigned char *src = hle->imem() + 0x120; + + /* dma_read(0x1120, 0x1e8, 0x1e8) */ + memcpy(hle->imem() + 0x120, hle->dram() + 0x1e8, 0x1f0); + + /* dma_write(0x1120, 0x2fb1f0, 0xfe817000) */ + for (i = 0; i < 24; ++i) + { + memcpy(dst, src, 8); + dst += 0xff0; + src += 0x8; + } +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/common.h b/Source/Android/PluginRSP/common.h new file mode 100644 index 000000000..e9a86660c --- /dev/null +++ b/Source/Android/PluginRSP/common.h @@ -0,0 +1,56 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#pragma once + +/* macro for unused variable warning suppression */ +#ifdef __GNUC__ +# define UNUSED(x) UNUSED_ ## x __attribute__((__unused__)) +#else +# define UNUSED(x) /* x */ +#endif + +/* macro for inline keyword */ +#ifdef _MSC_VER +#define inline __inline +#endif + +/* Dll function linking */ +#if defined(_WIN32) +#define EXPORT extern "C" __declspec(dllexport) +#define CALL __cdecl +#else +#define EXPORT extern "C" __attribute__((visibility("default"))) +#define CALL +#endif + +/* Plugin types */ +enum +{ + PLUGIN_TYPE_RSP = 1, + PLUGIN_TYPE_GFX = 2, + PLUGIN_TYPE_AUDIO = 3, + PLUGIN_TYPE_CONTROLLER = 4, +}; + +/***** Structures *****/ + +typedef struct +{ + uint16_t Version; /* Should be set to 0x0101 */ + uint16_t Type; /* Set to PLUGIN_TYPE_RSP */ + char Name[100]; /* Name of the DLL */ + + /* If DLL supports memory these memory options then set them to TRUE or FALSE + if it does not support it */ + int NormalMemory; /* a normal BYTE array */ + int MemoryBswaped; /* a normal BYTE array where the memory has been pre + bswap on a dword (32 bits) boundry */ +} PLUGIN_INFO; diff --git a/Source/Android/PluginRSP/hle.cpp b/Source/Android/PluginRSP/hle.cpp new file mode 100644 index 000000000..def475ba6 --- /dev/null +++ b/Source/Android/PluginRSP/hle.cpp @@ -0,0 +1,336 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include "mem.h" +#include "ucodes.h" +#include + +#define min(a,b) (((a) < (b)) ? (a) : (b)) + +/* helper functions prototypes */ +static unsigned int sum_bytes(const uint8_t *bytes, uint32_t size); + +CHle::CHle(const RSP_INFO & Rsp_Info) : + m_dram(Rsp_Info.RDRAM), + m_dmem(Rsp_Info.DMEM), + m_imem(Rsp_Info.IMEM), + m_mi_intr(Rsp_Info.MI_INTR_REG), + m_sp_mem_addr(Rsp_Info.SP_MEM_ADDR_REG), + m_sp_dram_addr(Rsp_Info.SP_DRAM_ADDR_REG), + m_sp_rd_length(Rsp_Info.SP_RD_LEN_REG), + m_sp_wr_length(Rsp_Info.SP_WR_LEN_REG), + m_sp_status(Rsp_Info.SP_STATUS_REG), + m_sp_dma_full(Rsp_Info.SP_DMA_FULL_REG), + m_sp_dma_busy(Rsp_Info.SP_DMA_BUSY_REG), + m_sp_pc(Rsp_Info.SP_PC_REG), + m_sp_semaphore(Rsp_Info.SP_SEMAPHORE_REG), + m_dpc_start(Rsp_Info.DPC_START_REG), + m_dpc_end(Rsp_Info.DPC_END_REG), + m_dpc_current(Rsp_Info.DPC_CURRENT_REG), + m_dpc_status(Rsp_Info.DPC_STATUS_REG), + m_dpc_clock(Rsp_Info.DPC_CLOCK_REG), + m_dpc_bufbusy(Rsp_Info.DPC_BUFBUSY_REG), + m_dpc_pipebusy(Rsp_Info.DPC_PIPEBUSY_REG), + m_dpc_tmem(Rsp_Info.DPC_TMEM_REG), + m_CheckInterrupts(Rsp_Info.CheckInterrupts), + m_ProcessDList(Rsp_Info.ProcessDList), + m_ProcessAList(Rsp_Info.ProcessAList), + m_ProcessRdpList(Rsp_Info.ProcessRdpList), + m_ShowCFB(Rsp_Info.ShowCFB), + m_AudioHle(false), + m_GraphicsHle(true), + m_ForwardAudio(false), + m_ForwardGFX(true) +{ + //m_AudioHle = ReadCfgInt("Settings", "AudioHle", false); + //m_GraphicsHle = ReadCfgInt("Settings", "GraphicsHle", true); + memset(&m_alist_buffer, 0, sizeof(m_alist_buffer)); + memset(&m_alist_audio, 0, sizeof(m_alist_audio)); + memset(&m_alist_naudio, 0, sizeof(m_alist_naudio)); + memset(&m_alist_nead, 0, sizeof(m_alist_nead)); + memset(&m_mp3_buffer, 0, sizeof(m_mp3_buffer)); +} + +CHle::~CHle() +{ +} + +void CHle::rsp_break(unsigned int setbits) +{ + *m_sp_status |= setbits | SP_STATUS_BROKE | SP_STATUS_HALT; + + if ((*m_sp_status & SP_STATUS_INTR_BREAK)) + { + *m_mi_intr |= MI_INTR_SP; + m_CheckInterrupts(); + } +} + +void CHle::hle_execute(void) +{ + if (is_task()) + { + if (!try_fast_task_dispatching()) + { + normal_task_dispatching(); + } + rsp_break(SP_STATUS_SIG2); + } + else + { + non_task_dispatching(); + rsp_break(0); + } +} + +/* local functions */ +static unsigned int sum_bytes(const uint8_t * bytes, unsigned int size) +{ + unsigned int sum = 0; + const unsigned char *const bytes_end = bytes + size; + + while (bytes != bytes_end) + { + sum += *bytes++; + } + return sum; +} + +/** +* Try to figure if the RSP was launched using osSpTask* functions +* and not run directly (in which case DMEM[0xfc0-0xfff] is meaningless). +* +* Previously, the ucode_size field was used to determine this, +* but it is not robust enough (hi Pokemon Stadium !) because games could write anything +* in this field : most ucode_boot discard the value and just use 0xf7f anyway. +* +* Using ucode_boot_size should be more robust in this regard. +**/ +bool CHle::is_task(void) +{ + return (*dmem_u32(this, TASK_UCODE_BOOT_SIZE) <= 0x1000); +} + +bool CHle::try_fast_task_dispatching(void) +{ + /* identify task ucode by its type */ + switch (*dmem_u32(this, TASK_TYPE)) + { + case 1: + if (m_ForwardGFX) + { + m_ProcessDList(); + return true; + } + break; + case 2: + if (m_AudioHle) + { + m_ProcessAList(); + return true; + } + else if (try_fast_audio_dispatching()) + { + return true; + } + break; + case 7: + m_ShowCFB(); + return true; + } + return false; +} + +bool CHle::try_fast_audio_dispatching(void) +{ + /* identify audio ucode by using the content of ucode_data */ + uint32_t ucode_data = *dmem_u32(this, TASK_UCODE_DATA); + uint32_t v; + + if (*dram_u32(this, ucode_data) == 0x00000001) + { + if (*dram_u32(this, ucode_data + 0x30) == 0xf0000f00) + { + v = *dram_u32(this, ucode_data + 0x28); + switch (v) + { + case 0x1e24138c: /* audio ABI (most common) */ + alist_process_audio(this); + return true; + case 0x1dc8138c: /* GoldenEye */ + alist_process_audio_ge(this); + return true; + case 0x1e3c1390: /* BlastCorp, DiddyKongRacing */ + alist_process_audio_bc(this); + return true; + default: + WarnMessage("ABI1 identification regression: v=%08x", v); + } + } + else + { + v = *dram_u32(this, ucode_data + 0x10); + switch (v) + { + case 0x11181350: /* MarioKart, WaveRace (E) */ + alist_process_nead_mk(this); + return true; + case 0x111812e0: /* StarFox (J) */ + alist_process_nead_sfj(this); + return true; + case 0x110412ac: /* WaveRace (J RevB) */ + alist_process_nead_wrjb(this); + return true; + case 0x110412cc: /* StarFox/LylatWars (except J) */ + alist_process_nead_sf(this); + return true; + case 0x1cd01250: /* FZeroX */ + alist_process_nead_fz(this); + return true; + case 0x1f08122c: /* YoshisStory */ + alist_process_nead_ys(this); + return true; + case 0x1f38122c: /* 1080° Snowboarding */ + alist_process_nead_1080(this); + return true; + case 0x1f681230: /* Zelda OoT / Zelda MM (J, J RevA) */ + alist_process_nead_oot(this); + return true; + case 0x1f801250: /* Zelda MM (except J, J RevA, E Beta), PokemonStadium 2 */ + alist_process_nead_mm(this); + return true; + case 0x109411f8: /* Zelda MM (E Beta) */ + alist_process_nead_mmb(this); + return true; + case 0x1eac11b8: /* AnimalCrossing */ + alist_process_nead_ac(this); + return true; + case 0x00010010: /* MusyX v2 (IndianaJones, BattleForNaboo) */ + musyx_v2_task(this); + return true; + default: + WarnMessage("ABI2 identification regression: v=%08x", v); + } + } + } + else + { + v = *dram_u32(this, ucode_data + 0x10); + switch (v) + { + case 0x00000001: /* MusyX v1 + RogueSquadron, ResidentEvil2, PolarisSnoCross, + TheWorldIsNotEnough, RugratsInParis, NBAShowTime, + HydroThunder, Tarzan, GauntletLegend, Rush2049 */ + musyx_v1_task(this); + return true; + case 0x0000127c: /* naudio (many games) */ + alist_process_naudio(this); + return true; + case 0x00001280: /* BanjoKazooie */ + alist_process_naudio_bk(this); + return true; + case 0x1c58126c: /* DonkeyKong */ + alist_process_naudio_dk(this); + return true; + case 0x1ae8143c: /* BanjoTooie, JetForceGemini, MickeySpeedWayUSA, PerfectDark */ + alist_process_naudio_mp3(this); + return true; + case 0x1ab0140c: /* ConkerBadFurDay */ + alist_process_naudio_cbfd(this); + return true; + default: + WarnMessage("ABI3 identification regression: v=%08x", v); + } + } + return false; +} + +void CHle::normal_task_dispatching(void) +{ + const unsigned int sum = + sum_bytes((const uint8_t *)dram_u32(this, *dmem_u32(this, TASK_UCODE)), min(*dmem_u32(this, TASK_UCODE_SIZE), 0xf80) >> 1); + + switch (sum) { + /* StoreVe12: found in Zelda Ocarina of Time [misleading task->type == 4] */ + case 0x278: + /* Nothing to emulate */ + return; + + /* GFX: Twintris [misleading task->type == 0] */ + case 0x212ee: + if (m_ForwardGFX) + { + m_ProcessDList(); + return; + } + break; + + /* JPEG: found in Pokemon Stadium J */ + case 0x2c85a: + jpeg_decode_PS0(this); + return; + + /* JPEG: found in Zelda Ocarina of Time, Pokemon Stadium 1, Pokemon Stadium 2 */ + case 0x2caa6: + jpeg_decode_PS(this); + return; + + /* JPEG: found in Ogre Battle, Bottom of the 9th */ + case 0x130de: + case 0x278b0: + jpeg_decode_OB(this); + return; + } + + WarnMessage("unknown OSTask: sum: %x PC:%x", sum, *m_sp_pc); +#ifdef ENABLE_TASK_DUMP + dump_unknown_task(this, sum); +#endif +} + +void CHle::non_task_dispatching(void) +{ + const unsigned int sum = sum_bytes(m_imem, 44); + + if (sum == 0x9e2) + { + /* CIC x105 ucode (used during boot of CIC x105 games) */ + cicx105_ucode(this); + return; + } + + WarnMessage("unknown RSP code: sum: %x PC:%x", sum, *m_sp_pc); +#ifdef ENABLE_TASK_DUMP + dump_unknown_non_task(hle, sum); +#endif +} + +#if defined(_WIN32) && defined(_DEBUG) +#include +#endif + +void CHle::VerboseMessage(const char *message, ...) +{ +#if defined(_WIN32) && defined(_DEBUG) + // These can get annoying. +#if 0 + MessageBox(NULL, message, "HLE Verbose Message", MB_OK); +#endif +#endif +} + +void CHle::WarnMessage(const char *message, ...) +{ +#if defined(_WIN32) && defined(_DEBUG) + MessageBox(NULL, message, "HLE Warning Message", MB_OK); +#endif +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/hle.h b/Source/Android/PluginRSP/hle.h new file mode 100644 index 000000000..a9e5ca4f5 --- /dev/null +++ b/Source/Android/PluginRSP/hle.h @@ -0,0 +1,160 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#pragma once +#include +#include "Rsp.h" +#include "ucodes.h" + +//Signal Processor interface flags +enum +{ + SP_CLR_HALT = 0x00001, /* Bit 0: clear halt */ + SP_SET_HALT = 0x00002, /* Bit 1: set halt */ + SP_CLR_BROKE = 0x00004, /* Bit 2: clear broke */ + SP_CLR_INTR = 0x00008, /* Bit 3: clear intr */ + SP_SET_INTR = 0x00010, /* Bit 4: set intr */ + SP_CLR_SSTEP = 0x00020, /* Bit 5: clear sstep */ + SP_SET_SSTEP = 0x00040, /* Bit 6: set sstep */ + SP_CLR_INTR_BREAK = 0x00080, /* Bit 7: clear intr on break */ + SP_SET_INTR_BREAK = 0x00100, /* Bit 8: set intr on break */ + SP_CLR_SIG0 = 0x00200, /* Bit 9: clear signal 0 */ + SP_SET_SIG0 = 0x00400, /* Bit 10: set signal 0 */ + SP_CLR_SIG1 = 0x00800, /* Bit 11: clear signal 1 */ + SP_SET_SIG1 = 0x01000, /* Bit 12: set signal 1 */ + SP_CLR_SIG2 = 0x02000, /* Bit 13: clear signal 2 */ + SP_SET_SIG2 = 0x04000, /* Bit 14: set signal 2 */ + SP_CLR_SIG3 = 0x08000, /* Bit 15: clear signal 3 */ + SP_SET_SIG3 = 0x10000, /* Bit 16: set signal 3 */ + SP_CLR_SIG4 = 0x20000, /* Bit 17: clear signal 4 */ + SP_SET_SIG4 = 0x40000, /* Bit 18: set signal 4 */ + SP_CLR_SIG5 = 0x80000, /* Bit 19: clear signal 5 */ + SP_SET_SIG5 = 0x100000, /* Bit 20: set signal 5 */ + SP_CLR_SIG6 = 0x200000, /* Bit 21: clear signal 6 */ + SP_SET_SIG6 = 0x400000, /* Bit 22: set signal 6 */ + SP_CLR_SIG7 = 0x800000, /* Bit 23: clear signal 7 */ + SP_SET_SIG7 = 0x1000000, /* Bit 24: set signal 7 */ + + SP_STATUS_HALT = 0x001, /* Bit 0: halt */ + SP_STATUS_BROKE = 0x002, /* Bit 1: broke */ + SP_STATUS_DMA_BUSY = 0x004, /* Bit 2: dma busy */ + SP_STATUS_DMA_FULL = 0x008, /* Bit 3: dma full */ + SP_STATUS_IO_FULL = 0x010, /* Bit 4: io full */ + SP_STATUS_SSTEP = 0x020, /* Bit 5: single step */ + SP_STATUS_INTR_BREAK = 0x040, /* Bit 6: interrupt on break */ + SP_STATUS_SIG0 = 0x080, /* Bit 7: signal 0 set */ + SP_STATUS_SIG1 = 0x100, /* Bit 8: signal 1 set */ + SP_STATUS_SIG2 = 0x200, /* Bit 9: signal 2 set */ + SP_STATUS_SIG3 = 0x400, /* Bit 10: signal 3 set */ + SP_STATUS_SIG4 = 0x800, /* Bit 11: signal 4 set */ + SP_STATUS_SIG5 = 0x1000, /* Bit 12: signal 5 set */ + SP_STATUS_SIG6 = 0x2000, /* Bit 13: signal 6 set */ + SP_STATUS_SIG7 = 0x4000, /* Bit 14: signal 7 set */ +}; + +//Mips interface flags +enum +{ + MI_INTR_SP = 0x01, /* Bit 0: SP intr */ + MI_INTR_SI = 0x02, /* Bit 1: SI intr */ + MI_INTR_AI = 0x04, /* Bit 2: AI intr */ + MI_INTR_VI = 0x08, /* Bit 3: VI intr */ + MI_INTR_PI = 0x10, /* Bit 4: PI intr */ + MI_INTR_DP = 0x20, /* Bit 5: DP intr */ +}; + +class CHle +{ +public: + CHle(const RSP_INFO & Rsp_Info); + ~CHle(); + + uint8_t * dram() { return m_dram; } + uint8_t * dmem() { return m_dmem; } + uint8_t * imem() { return m_imem; } + + bool AudioHle() { return m_AudioHle; } + bool GraphicsHle() { return m_GraphicsHle; } + struct alist_audio_t & alist_audio() { return m_alist_audio; } + struct alist_naudio_t & alist_naudio() { return m_alist_naudio; } + struct alist_nead_t & alist_nead() { return m_alist_nead; } + uint8_t * mp3_buffer() { return &m_mp3_buffer[0]; } + + uint8_t * alist_buffer() { return &m_alist_buffer[0]; } + + void VerboseMessage(const char *message, ...); + void WarnMessage(const char *message, ...); + void ErrorMessage(const char *message, ...); + + void rsp_break(uint32_t setbits); + void hle_execute(void); + +private: + CHle(void); // Disable default constructor + CHle(const CHle&); // Disable copy constructor + CHle& operator=(const CHle&); // Disable assignment + + bool is_task(void); + bool try_fast_audio_dispatching(void); + bool try_fast_task_dispatching(void); + void normal_task_dispatching(void); + void non_task_dispatching(void); + + uint8_t * m_dram; + uint8_t * m_dmem; + uint8_t * m_imem; + + uint32_t* m_mi_intr; + + uint32_t* m_sp_mem_addr; + uint32_t* m_sp_dram_addr; + uint32_t* m_sp_rd_length; + uint32_t* m_sp_wr_length; + uint32_t* m_sp_status; + uint32_t* m_sp_dma_full; + uint32_t* m_sp_dma_busy; + uint32_t* m_sp_pc; + uint32_t* m_sp_semaphore; + + uint32_t* m_dpc_start; + uint32_t* m_dpc_end; + uint32_t* m_dpc_current; + uint32_t* m_dpc_status; + uint32_t* m_dpc_clock; + uint32_t* m_dpc_bufbusy; + uint32_t* m_dpc_pipebusy; + uint32_t* m_dpc_tmem; + + void(*m_CheckInterrupts)(void); + void(*m_ProcessDList)(void); + void(*m_ProcessAList)(void); + void(*m_ProcessRdpList)(void); + void(*m_ShowCFB)(void); + + /* alist.cpp */ + uint8_t m_alist_buffer[0x1000]; + + /* alist_audio.cpp */ + struct alist_audio_t m_alist_audio; + + /* alist_naudio.cpp */ + struct alist_naudio_t m_alist_naudio; + + /* alist_nead.cpp */ + struct alist_nead_t m_alist_nead; + + /* mp3.cpp */ + uint8_t m_mp3_buffer[0x1000]; + + bool m_AudioHle; + bool m_GraphicsHle; + bool m_ForwardAudio; + bool m_ForwardGFX; +}; diff --git a/Source/Android/PluginRSP/jpeg.cpp b/Source/Android/PluginRSP/jpeg.cpp new file mode 100644 index 000000000..c88e5f6f5 --- /dev/null +++ b/Source/Android/PluginRSP/jpeg.cpp @@ -0,0 +1,594 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include + +#include "arithmetics.h" +#include "mem.h" + +#define SUBBLOCK_SIZE 64 + +typedef void(*tile_line_emitter_t)(CHle * hle, const int16_t *y, const int16_t *u, uint32_t address); +typedef void(*subblock_transform_t)(int16_t *dst, const int16_t *src); + +/* standard jpeg ucode decoder */ +static void jpeg_decode_std(CHle * hle, + const char *const version, + const subblock_transform_t transform_luma, + const subblock_transform_t transform_chroma, + const tile_line_emitter_t emit_line); + +/* helper functions */ +static uint8_t clamp_u8(int16_t x); +static int16_t clamp_s12(int16_t x); +static uint16_t clamp_RGBA_component(int16_t x); + +/* pixel conversion & formatting */ +static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v); +static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v); + +/* tile line emitters */ +static void EmitYUVTileLine(CHle * hle, const int16_t *y, const int16_t *u, uint32_t address); +static void EmitRGBATileLine(CHle * hle, const int16_t *y, const int16_t *u, uint32_t address); + +/* macroblocks operations */ +static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable); +static void decode_macroblock_std(const subblock_transform_t transform_luma, + const subblock_transform_t transform_chroma, + int16_t *macroblock, + unsigned int subblock_count, + const int16_t qtables[3][SUBBLOCK_SIZE]); +static void EmitTilesMode0(CHle * hle, const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); +static void EmitTilesMode2(CHle * hle, const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address); + +/* subblocks operations */ +static void TransposeSubBlock(int16_t *dst, const int16_t *src); +static void ZigZagSubBlock(int16_t *dst, const int16_t *src); +static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table); +static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift); +static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale); +static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift); +static void InverseDCT1D(const float *const x, float *dst, unsigned int stride); +static void InverseDCTSubBlock(int16_t *dst, const int16_t *src); +static void RescaleYSubBlock(int16_t *dst, const int16_t *src); +static void RescaleUVSubBlock(int16_t *dst, const int16_t *src); + +/* transposed dequantization table */ +static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] = { + 16, 12, 14, 14, 18, 24, 49, 72, + 11, 12, 13, 17, 22, 35, 64, 92, + 10, 14, 16, 22, 37, 55, 78, 95, + 16, 19, 24, 29, 56, 64, 87, 98, + 24, 26, 40, 51, 68, 81, 103, 112, + 40, 58, 57, 87, 109, 104, 121, 100, + 51, 60, 69, 80, 103, 113, 120, 103, + 61, 55, 56, 62, 77, 92, 101, 99 +}; + +/* zig-zag indices */ +static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] = { + 0, 1, 5, 6, 14, 15, 27, 28, + 2, 4, 7, 13, 16, 26, 29, 42, + 3, 8, 12, 17, 25, 30, 41, 43, + 9, 11, 18, 24, 31, 40, 44, 53, + 10, 19, 23, 32, 39, 45, 52, 54, + 20, 22, 33, 38, 46, 51, 55, 60, + 21, 34, 37, 47, 50, 56, 59, 61, + 35, 36, 48, 49, 57, 58, 62, 63 +}; + +/* transposition indices */ +static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] = { + 0, 8, 16, 24, 32, 40, 48, 56, + 1, 9, 17, 25, 33, 41, 49, 57, + 2, 10, 18, 26, 34, 42, 50, 58, + 3, 11, 19, 27, 35, 43, 51, 59, + 4, 12, 20, 28, 36, 44, 52, 60, + 5, 13, 21, 29, 37, 45, 53, 61, + 6, 14, 22, 30, 38, 46, 54, 62, + 7, 15, 23, 31, 39, 47, 55, 63 +}; + +/* IDCT related constants + * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */ +static const float IDCT_C3 = 1.175875602f; +static const float IDCT_C6 = 0.541196100f; +static const float IDCT_K[10] = { + 0.765366865f, /* C2-C6 */ + -1.847759065f, /* -C2-C6 */ + -0.390180644f, /* C5-C3 */ + -1.961570561f, /* -C5-C3 */ + 1.501321110f, /* C1+C3-C5-C7 */ + 2.053119869f, /* C1+C3-C5+C7 */ + 3.072711027f, /* C1+C3+C5-C7 */ + 0.298631336f, /* -C1+C3+C5-C7 */ + -0.899976223f, /* C7-C3 */ + -2.562915448f /* -C1-C3 */ +}; + +/* global functions */ + +/*************************************************************************** +* JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium. +**************************************************************************/ +void jpeg_decode_PS0(CHle * hle) +{ + jpeg_decode_std(hle, "PS0", RescaleYSubBlock, RescaleUVSubBlock, EmitYUVTileLine); +} + +/*************************************************************************** +* JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and +* Pokemon Stadium 2. +**************************************************************************/ +void jpeg_decode_PS(CHle * hle) +{ + jpeg_decode_std(hle, "PS", NULL, NULL, EmitRGBATileLine); +} + +/*************************************************************************** +* JPEG decoding ucode found in Ogre Battle and Bottom of the 9th. +**************************************************************************/ +void jpeg_decode_OB(CHle * hle) +{ + int16_t qtable[SUBBLOCK_SIZE]; + unsigned int mb; + + int32_t y_dc = 0; + int32_t u_dc = 0; + int32_t v_dc = 0; + + uint32_t address = *dmem_u32(hle, TASK_DATA_PTR); + const unsigned int macroblock_count = *dmem_u32(hle, TASK_DATA_SIZE); + const int qscale = *dmem_u32(hle, TASK_YIELD_DATA_SIZE); + + hle->VerboseMessage("jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d", address, macroblock_count, qscale); + + if (qscale != 0) + { + if (qscale > 0) + { + ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale); + } + else + { + RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale); + } + } + + for (mb = 0; mb < macroblock_count; ++mb) + { + int16_t macroblock[6 * SUBBLOCK_SIZE]; + + dram_load_u16(hle, (uint16_t *)macroblock, address, 6 * SUBBLOCK_SIZE); + decode_macroblock_ob(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL); + EmitTilesMode2(hle, EmitYUVTileLine, macroblock, address); + + address += (2 * 6 * SUBBLOCK_SIZE); + } +} + +/* local functions */ +static void jpeg_decode_std(CHle * hle, const char *const version, const subblock_transform_t transform_luma, const subblock_transform_t transform_chroma, const tile_line_emitter_t emit_line) +{ + int16_t qtables[3][SUBBLOCK_SIZE]; + unsigned int mb; + uint32_t address; + uint32_t macroblock_count; + uint32_t mode; + uint32_t qtableY_ptr; + uint32_t qtableU_ptr; + uint32_t qtableV_ptr; + unsigned int subblock_count; + unsigned int macroblock_size; + /* macroblock contains at most 6 subblocks */ + int16_t macroblock[6 * SUBBLOCK_SIZE]; + uint32_t data_ptr; + + if (*dmem_u32(hle, TASK_FLAGS) & 0x1) + { + hle->WarnMessage("jpeg_decode_%s: task yielding not implemented", version); + return; + } + + data_ptr = *dmem_u32(hle, TASK_DATA_PTR); + address = *dram_u32(hle, data_ptr); + macroblock_count = *dram_u32(hle, data_ptr + 4); + mode = *dram_u32(hle, data_ptr + 8); + qtableY_ptr = *dram_u32(hle, data_ptr + 12); + qtableU_ptr = *dram_u32(hle, data_ptr + 16); + qtableV_ptr = *dram_u32(hle, data_ptr + 20); + + hle->VerboseMessage("jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x", version, address, macroblock_count, mode, qtableY_ptr, qtableU_ptr, qtableV_ptr); + + if (mode != 0 && mode != 2) + { + hle->WarnMessage("jpeg_decode_%s: invalid mode %d", version, mode); + return; + } + + subblock_count = mode + 4; + macroblock_size = subblock_count * SUBBLOCK_SIZE; + + dram_load_u16(hle, (uint16_t *)qtables[0], qtableY_ptr, SUBBLOCK_SIZE); + dram_load_u16(hle, (uint16_t *)qtables[1], qtableU_ptr, SUBBLOCK_SIZE); + dram_load_u16(hle, (uint16_t *)qtables[2], qtableV_ptr, SUBBLOCK_SIZE); + + for (mb = 0; mb < macroblock_count; ++mb) + { + dram_load_u16(hle, (uint16_t *)macroblock, address, macroblock_size); + decode_macroblock_std(transform_luma, transform_chroma, macroblock, subblock_count, (const int16_t(*)[SUBBLOCK_SIZE])qtables); + + if (mode == 0) + { + EmitTilesMode0(hle, emit_line, macroblock, address); + } + else + { + EmitTilesMode2(hle, emit_line, macroblock, address); + } + address += 2 * macroblock_size; + } +} + +static uint8_t clamp_u8(int16_t x) +{ + return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x; +} + +static int16_t clamp_s12(int16_t x) +{ + if (x < -0x800) + { + x = -0x800; + } + else if (x > 0x7f0) + { + x = 0x7f0; + } + return x; +} + +static uint16_t clamp_RGBA_component(int16_t x) +{ + if (x > 0xff0) + { + x = 0xff0; + } + else if (x < 0) + { + x = 0; + } + return (x & 0xf80); +} + +static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v) +{ + return (uint32_t)clamp_u8(u) << 24 | + (uint32_t)clamp_u8(y1) << 16 | + (uint32_t)clamp_u8(v) << 8 | + (uint32_t)clamp_u8(y2); +} + +static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v) +{ + const float fY = (float)y + 2048.0f; + const float fU = (float)u; + const float fV = (float)v; + + const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025 * fV)); + const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443 * fU - 0.7144 * fV)); + const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729 * fU)); + + return (r << 4) | (g >> 1) | (b >> 6) | 1; +} + +static void EmitYUVTileLine(CHle * hle, const int16_t *y, const int16_t *u, uint32_t address) +{ + uint32_t uyvy[8]; + + const int16_t *const v = u + SUBBLOCK_SIZE; + const int16_t *const y2 = y + SUBBLOCK_SIZE; + + uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]); + uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]); + uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]); + uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]); + uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]); + uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]); + uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]); + uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]); + + dram_store_u32(hle, uyvy, address, 8); +} + +static void EmitRGBATileLine(CHle * hle, const int16_t *y, const int16_t *u, uint32_t address) +{ + uint16_t rgba[16]; + + const int16_t *const v = u + SUBBLOCK_SIZE; + const int16_t *const y2 = y + SUBBLOCK_SIZE; + + rgba[0] = GetRGBA(y[0], u[0], v[0]); + rgba[1] = GetRGBA(y[1], u[0], v[0]); + rgba[2] = GetRGBA(y[2], u[1], v[1]); + rgba[3] = GetRGBA(y[3], u[1], v[1]); + rgba[4] = GetRGBA(y[4], u[2], v[2]); + rgba[5] = GetRGBA(y[5], u[2], v[2]); + rgba[6] = GetRGBA(y[6], u[3], v[3]); + rgba[7] = GetRGBA(y[7], u[3], v[3]); + rgba[8] = GetRGBA(y2[0], u[4], v[4]); + rgba[9] = GetRGBA(y2[1], u[4], v[4]); + rgba[10] = GetRGBA(y2[2], u[5], v[5]); + rgba[11] = GetRGBA(y2[3], u[5], v[5]); + rgba[12] = GetRGBA(y2[4], u[6], v[6]); + rgba[13] = GetRGBA(y2[5], u[6], v[6]); + rgba[14] = GetRGBA(y2[6], u[7], v[7]); + rgba[15] = GetRGBA(y2[7], u[7], v[7]); + + dram_store_u16(hle, rgba, address, 16); +} + +static void EmitTilesMode0(CHle * hle, const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) +{ + unsigned int i; + + unsigned int y_offset = 0; + unsigned int u_offset = 2 * SUBBLOCK_SIZE; + + for (i = 0; i < 8; ++i) { + emit_line(hle, ¯oblock[y_offset], ¯oblock[u_offset], address); + + y_offset += 8; + u_offset += 8; + address += 32; + } +} + +static void EmitTilesMode2(CHle * hle, const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address) +{ + unsigned int i; + + unsigned int y_offset = 0; + unsigned int u_offset = 4 * SUBBLOCK_SIZE; + + for (i = 0; i < 8; ++i) + { + emit_line(hle, ¯oblock[y_offset], ¯oblock[u_offset], address); + emit_line(hle, ¯oblock[y_offset + 8], ¯oblock[u_offset], address + 32); + + y_offset += (i == 3) ? SUBBLOCK_SIZE + 16 : 16; + u_offset += 8; + address += 64; + } +} + +static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable) +{ + int sb; + + for (sb = 0; sb < 6; ++sb) { + int16_t tmp_sb[SUBBLOCK_SIZE]; + + /* update DC */ + int32_t dc = (int32_t)macroblock[0]; + switch (sb) { + case 0: + case 1: + case 2: + case 3: + *y_dc += dc; + macroblock[0] = *y_dc & 0xffff; + break; + case 4: + *u_dc += dc; + macroblock[0] = *u_dc & 0xffff; + break; + case 5: + *v_dc += dc; + macroblock[0] = *v_dc & 0xffff; + break; + } + + ZigZagSubBlock(tmp_sb, macroblock); + if (qtable != NULL) + { + MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); + } + TransposeSubBlock(macroblock, tmp_sb); + InverseDCTSubBlock(macroblock, macroblock); + + macroblock += SUBBLOCK_SIZE; + } +} + +static void decode_macroblock_std(const subblock_transform_t transform_luma, + const subblock_transform_t transform_chroma, + int16_t *macroblock, + unsigned int subblock_count, + const int16_t qtables[3][SUBBLOCK_SIZE]) +{ + unsigned int sb; + unsigned int q = 0; + + for (sb = 0; sb < subblock_count; ++sb) + { + int16_t tmp_sb[SUBBLOCK_SIZE]; + const int isChromaSubBlock = (subblock_count - sb <= 2); + + if (isChromaSubBlock) + { + ++q; + } + + MultSubBlocks(macroblock, macroblock, qtables[q], 4); + ZigZagSubBlock(tmp_sb, macroblock); + InverseDCTSubBlock(macroblock, tmp_sb); + + if (isChromaSubBlock) + { + if (transform_chroma != NULL) + { + transform_chroma(macroblock, macroblock); + } + } + else + { + if (transform_luma != NULL) + { + transform_luma(macroblock, macroblock); + } + } + macroblock += SUBBLOCK_SIZE; + } +} + +static void TransposeSubBlock(int16_t *dst, const int16_t *src) +{ + ReorderSubBlock(dst, src, TRANSPOSE_TABLE); +} + +static void ZigZagSubBlock(int16_t *dst, const int16_t *src) +{ + ReorderSubBlock(dst, src, ZIGZAG_TABLE); +} + +static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table) +{ + unsigned int i; + + /* source and destination sublocks cannot overlap */ + assert(abs(dst - src) > SUBBLOCK_SIZE); + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + dst[i] = src[table[i]]; +} + +static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + { + int32_t v = src1[i] * src2[i]; + dst[i] = clamp_s16(v) << shift; + } +} + +static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) { + int32_t v = src[i] * scale; + dst[i] = clamp_s16(v); + } +} + +static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + dst[i] = src[i] >> shift; +} + +/*************************************************************************** + * Fast 2D IDCT using separable formulation and normalization + * Computations use single precision floats + * Implementation based on Wikipedia : + * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te + **************************************************************************/ +static void InverseDCT1D(const float *const x, float *dst, unsigned int stride) +{ + float e[4]; + float f[4]; + float x26, x1357, x15, x37, x17, x35; + + x15 = IDCT_K[2] * (x[1] + x[5]); + x37 = IDCT_K[3] * (x[3] + x[7]); + x17 = IDCT_K[8] * (x[1] + x[7]); + x35 = IDCT_K[9] * (x[3] + x[5]); + x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]); + x26 = IDCT_C6 * (x[2] + x[6]); + + f[0] = x[0] + x[4]; + f[1] = x[0] - x[4]; + f[2] = x26 + IDCT_K[0] * x[2]; + f[3] = x26 + IDCT_K[1] * x[6]; + + e[0] = x1357 + x15 + IDCT_K[4] * x[1] + x17; + e[1] = x1357 + x37 + IDCT_K[6] * x[3] + x35; + e[2] = x1357 + x15 + IDCT_K[5] * x[5] + x35; + e[3] = x1357 + x37 + IDCT_K[7] * x[7] + x17; + + *dst = f[0] + f[2] + e[0]; + dst += stride; + *dst = f[1] + f[3] + e[1]; + dst += stride; + *dst = f[1] - f[3] + e[2]; + dst += stride; + *dst = f[0] - f[2] + e[3]; + dst += stride; + *dst = f[0] - f[2] - e[3]; + dst += stride; + *dst = f[1] - f[3] - e[2]; + dst += stride; + *dst = f[1] + f[3] - e[1]; + dst += stride; + *dst = f[0] + f[2] - e[0]; +} + +static void InverseDCTSubBlock(int16_t *dst, const int16_t *src) +{ + float x[8]; + float block[SUBBLOCK_SIZE]; + unsigned int i, j; + + /* idct 1d on rows (+transposition) */ + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + x[j] = (float)src[i * 8 + j]; + } + InverseDCT1D(x, &block[i], 8); + } + + /* idct 1d on columns (thanks to previous transposition) */ + for (i = 0; i < 8; ++i) + { + InverseDCT1D(&block[i * 8], x, 1); + + /* C4 = 1 normalization implies a division by 8 */ + for (j = 0; j < 8; ++j) + { + dst[i + j * 8] = (int16_t)x[j] >> 3; + } + } +} +static void RescaleYSubBlock(int16_t *dst, const int16_t *src) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + { + dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10; + } +} +static void RescaleUVSubBlock(int16_t *dst, const int16_t *src) +{ + unsigned int i; + + for (i = 0; i < SUBBLOCK_SIZE; ++i) + { + dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80; + } +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/main.cpp b/Source/Android/PluginRSP/main.cpp new file mode 100644 index 000000000..708fc0617 --- /dev/null +++ b/Source/Android/PluginRSP/main.cpp @@ -0,0 +1,142 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include "Rsp.h" + +CHle * g_hle = NULL; + +#ifdef _WIN32 +#include + +void * g_hinstDLL; + +BOOL WINAPI DllMain(void * hinst, DWORD /*fdwReason*/, LPVOID /*lpvReserved*/) +{ + g_hinstDLL = hinst; + return true; +} +#endif + +/****************************************************************** + Function: CloseDLL + Purpose: This function is called when the emulator is closing + down allowing the dll to de-initialise. + input: none + output: none + *******************************************************************/ +void CloseDLL(void) +{ + if (g_hle) + { + delete g_hle; + g_hle = NULL; + } +} + +/****************************************************************** + Function: DllAbout + Purpose: This function is optional function that is provided + to give further information about the DLL. + input: a handle to the window that calls this function + output: none + *******************************************************************/ +void DllAbout(void * hParent) +{ +#ifdef _WIN32 + MessageBox((HWND)hParent, "need to do", "About", MB_OK | MB_ICONINFORMATION); +#endif +} + +/****************************************************************** +Function: DoRspCycles +Purpose: This function is to allow the RSP to run in parrel with +the r4300 switching control back to the r4300 once the +function ends. +input: The number of cylces that is meant to be executed +output: The number of cycles that was executed. This value can +be greater than the number of cycles that the RSP +should have performed. +(this value is ignored if the RSP is stoped) +*******************************************************************/ +uint32_t DoRspCycles(uint32_t Cycles) +{ + if (g_hle) + { + g_hle->hle_execute(); + } + return Cycles; +} + +/****************************************************************** + Function: GetDllInfo + Purpose: This function allows the emulator to gather information + about the dll by filling in the PluginInfo structure. + input: a pointer to a PLUGIN_INFO stucture that needs to be + filled by the function. (see def above) + output: none + *******************************************************************/ +void GetDllInfo(PLUGIN_INFO * PluginInfo) +{ + PluginInfo->Version = 0x0102; + PluginInfo->Type = PLUGIN_TYPE_RSP; +#ifdef _DEBUG + sprintf(PluginInfo->Name, "RSP HLE Debug Plugin %s", VER_FILE_VERSION_STR); +#else + sprintf(PluginInfo->Name, "RSP HLE Plugin %s", VER_FILE_VERSION_STR); +#endif + PluginInfo->NormalMemory = false; + PluginInfo->MemoryBswaped = true; +} + +/****************************************************************** +Function: InitiateRSP +Purpose: This function is called when the DLL is started to give +information from the emulator that the n64 RSP +interface needs +input: Rsp_Info is passed to this function which is defined +above. +CycleCount is the number of cycles between switching +control between teh RSP and r4300i core. +output: none +*******************************************************************/ +void InitiateRSP(RSP_INFO Rsp_Info, uint32_t * /*CycleCount*/) +{ + if (g_hle) + { + delete g_hle; + g_hle = NULL; + } + g_hle = new CHle(Rsp_Info); +} + +/****************************************************************** +Function: RomOpen +Purpose: This function is called when a rom is opened. +input: none +output: none +*******************************************************************/ +void RomOpen(void) +{ +} + +/****************************************************************** +Function: RomClosed +Purpose: This function is called when a rom is closed. +input: none +output: none +*******************************************************************/ +void RomClosed(void) +{ +} + +void PluginLoaded(void) +{ +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/mem.cpp b/Source/Android/PluginRSP/mem.cpp new file mode 100644 index 000000000..0491eb556 --- /dev/null +++ b/Source/Android/PluginRSP/mem.cpp @@ -0,0 +1,57 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include + +#include "mem.h" + +/* Global functions */ +void load_u8(uint8_t* dst, const unsigned char* buffer, unsigned address, size_t count) +{ + while (count != 0) + { + *(dst++) = *u8(buffer, address); + address += 1; + --count; + } +} + +void store_u16(unsigned char* buffer, unsigned address, const uint16_t* src, size_t count) +{ + while (count != 0) + { + *u16(buffer, address) = *(src++); + address += 2; + --count; + } +} + +void load_u32(uint32_t* dst, const unsigned char* buffer, unsigned address, size_t count) +{ + /* Optimization for uint32_t */ + memcpy(dst, u32(buffer, address), count * sizeof(uint32_t)); +} + +void load_u16(uint16_t* dst, const unsigned char* buffer, unsigned address, size_t count) +{ + while (count != 0) + { + *(dst++) = *u16(buffer, address); + address += 2; + --count; + } +} + +void store_u32(unsigned char* buffer, unsigned address, const uint32_t* src, size_t count) +{ + /* Optimization for uint32_t */ + memcpy(u32(buffer, address), src, count * sizeof(uint32_t)); +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/mem.h b/Source/Android/PluginRSP/mem.h new file mode 100644 index 000000000..badf9025f --- /dev/null +++ b/Source/Android/PluginRSP/mem.h @@ -0,0 +1,116 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#pragma once +#include + +#define S 1 +#define S16 2 +#define S8 3 + +enum +{ + TASK_TYPE = 0xfc0, + TASK_FLAGS = 0xfc4, + TASK_UCODE_BOOT = 0xfc8, + TASK_UCODE_BOOT_SIZE = 0xfcc, + TASK_UCODE = 0xfd0, + TASK_UCODE_SIZE = 0xfd4, + TASK_UCODE_DATA = 0xfd8, + TASK_UCODE_DATA_SIZE = 0xfdc, + TASK_DRAM_STACK = 0xfe0, + TASK_DRAM_STACK_SIZE = 0xfe4, + TASK_OUTPUT_BUFF = 0xfe8, + TASK_OUTPUT_BUFF_SIZE = 0xfec, + TASK_DATA_PTR = 0xff0, + TASK_DATA_SIZE = 0xff4, + TASK_YIELD_DATA_PTR = 0xff8, + TASK_YIELD_DATA_SIZE = 0xffc +}; + +static inline unsigned int align(unsigned int x, unsigned amount) +{ + --amount; + return (x + amount) & ~amount; +} + +static inline uint8_t* u8(const unsigned char* buffer, unsigned address) +{ + return (uint8_t*)(buffer + (address ^ S8)); +} + +static inline uint16_t* u16(const unsigned char* buffer, unsigned address) +{ + assert((address & 1) == 0); + return (uint16_t*)(buffer + (address ^ S16)); +} + +static inline uint32_t* u32(const unsigned char* buffer, unsigned address) +{ + assert((address & 3) == 0); + return (uint32_t*)(buffer + address); +} + +void load_u8 (uint8_t* dst, const unsigned char* buffer, unsigned address, size_t count); +void load_u16(uint16_t* dst, const unsigned char* buffer, unsigned address, size_t count); +void load_u32(uint32_t* dst, const unsigned char* buffer, unsigned address, size_t count); +void store_u16(unsigned char* buffer, unsigned address, const uint16_t* src, size_t count); +void store_u32(unsigned char* buffer, unsigned address, const uint32_t* src, size_t count); + +static inline uint32_t* dmem_u32(CHle * hle, uint16_t address) +{ + return u32(hle->dmem(), address & 0xfff); +} + +static inline void dmem_store_u32(CHle * hle, const uint32_t* src, uint16_t address, size_t count) +{ + store_u32(hle->dmem(), address & 0xfff, src, count); +} + +/* convenient functions DRAM access */ +static inline uint8_t* dram_u8(CHle * hle, uint32_t address) +{ + return u8(hle->dram(), address & 0xffffff); +} + +static inline uint16_t* dram_u16(CHle * hle, uint32_t address) +{ + return u16(hle->dram(), address & 0xffffff); +} + +static inline uint32_t* dram_u32(CHle * hle, uint32_t address) +{ + return u32(hle->dram(), address & 0xffffff); +} + +static inline void dram_load_u8(CHle * hle, uint8_t* dst, uint32_t address, size_t count) +{ + load_u8(dst, hle->dram(), address & 0xffffff, count); +} + +static inline void dram_load_u16(CHle * hle, uint16_t* dst, uint32_t address, size_t count) +{ + load_u16(dst, hle->dram(), address & 0xffffff, count); +} + +static inline void dram_load_u32(CHle * hle, uint32_t* dst, uint32_t address, size_t count) +{ + load_u32(dst, hle->dram(), address & 0xffffff, count); +} + +static inline void dram_store_u16(CHle * hle, const uint16_t* src, uint32_t address, size_t count) +{ + store_u16(hle->dram(), address & 0xffffff, src, count); +} + +static inline void dram_store_u32(CHle * hle, const uint32_t* src, uint32_t address, size_t count) +{ + store_u32(hle->dram(), address & 0xffffff, src, count); +} diff --git a/Source/Android/PluginRSP/mp3.cpp b/Source/Android/PluginRSP/mp3.cpp new file mode 100644 index 000000000..4ec7d8261 --- /dev/null +++ b/Source/Android/PluginRSP/mp3.cpp @@ -0,0 +1,681 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include + +#include "arithmetics.h" +#include "mem.h" + +static void InnerLoop(CHle * hle, uint32_t outPtr, uint32_t inPtr, uint32_t t6, uint32_t t5, uint32_t t4); + +static const uint16_t DeWindowLUT [0x420] = { + 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E, + 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D, + 0x0000, 0xFFF3, 0x005D, 0xFF38, 0x037A, 0xF736, 0x0B37, 0xC00E, + 0x7FFF, 0x3FF2, 0x0B37, 0x08CA, 0x037A, 0x00C8, 0x005D, 0x000D, + 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7, + 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B, + 0x0000, 0xFFF2, 0x005F, 0xFF1D, 0x0369, 0xF697, 0x0A2A, 0xBCE7, + 0x7FEB, 0x3CCB, 0x0C2B, 0x082B, 0x0385, 0x00AF, 0x005B, 0x000B, + 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4, + 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A, + 0x0000, 0xFFF1, 0x0061, 0xFF02, 0x0354, 0xF5F9, 0x0905, 0xB9C4, + 0x7FB0, 0x39A4, 0x0D08, 0x078C, 0x038C, 0x0098, 0x0058, 0x000A, + 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4, + 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009, + 0x0000, 0xFFEF, 0x0062, 0xFEE6, 0x033B, 0xF55C, 0x07C8, 0xB6A4, + 0x7F4D, 0x367E, 0x0DCE, 0x06EE, 0x038F, 0x0080, 0x0056, 0x0009, + 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C, + 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008, + 0x0000, 0xFFEE, 0x0063, 0xFECA, 0x031C, 0xF4C3, 0x0671, 0xB38C, + 0x7EC2, 0x335D, 0x0E7C, 0x0652, 0x038E, 0x006B, 0x0053, 0x0008, + 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C, + 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007, + 0x0000, 0xFFEC, 0x0064, 0xFEAC, 0x02F7, 0xF42C, 0x0502, 0xB07C, + 0x7E12, 0x3041, 0x0F14, 0x05B7, 0x038A, 0x0056, 0x0050, 0x0007, + 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75, + 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007, + 0x0000, 0xFFEB, 0x0064, 0xFE8E, 0x02CE, 0xF399, 0x037A, 0xAD75, + 0x7D3A, 0x2D2C, 0x0F97, 0x0520, 0x0382, 0x0043, 0x004D, 0x0007, + 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B, + 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006, + 0xFFFF, 0xFFE9, 0x0063, 0xFE6F, 0x029E, 0xF30B, 0x01D8, 0xAA7B, + 0x7C3D, 0x2A1F, 0x1004, 0x048B, 0x0377, 0x0030, 0x004A, 0x0006, + 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D, + 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006, + 0xFFFF, 0xFFE7, 0x0062, 0xFE4F, 0x0269, 0xF282, 0x001F, 0xA78D, + 0x7B1A, 0x271C, 0x105D, 0x03F9, 0x036A, 0x001F, 0x0046, 0x0006, + 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF, + 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005, + 0xFFFF, 0xFFE4, 0x0061, 0xFE2F, 0x022F, 0xF1FF, 0xFE4C, 0xA4AF, + 0x79D3, 0x2425, 0x10A2, 0x036C, 0x0359, 0x0010, 0x0043, 0x0005, + 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1, + 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004, + 0xFFFF, 0xFFE2, 0x005E, 0xFE10, 0x01EE, 0xF184, 0xFC61, 0xA1E1, + 0x7869, 0x2139, 0x10D3, 0x02E3, 0x0346, 0x0001, 0x0040, 0x0004, + 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27, + 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004, + 0xFFFF, 0xFFE0, 0x005B, 0xFDF0, 0x01A8, 0xF111, 0xFA5F, 0x9F27, + 0x76DB, 0x1E5C, 0x10F2, 0x025E, 0x0331, 0xFFF3, 0x003D, 0x0004, + 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80, + 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003, + 0xFFFF, 0xFFDE, 0x0057, 0xFDD0, 0x015B, 0xF0A7, 0xF845, 0x9C80, + 0x752C, 0x1B8E, 0x1100, 0x01DE, 0x0319, 0xFFE7, 0x003A, 0x0003, + 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE, + 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003, + 0xFFFE, 0xFFDB, 0x0053, 0xFDB0, 0x0108, 0xF046, 0xF613, 0x99EE, + 0x735C, 0x18D1, 0x10FD, 0x0163, 0x0300, 0xFFDC, 0x0037, 0x0003, + 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775, + 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003, + 0xFFFE, 0xFFD8, 0x004D, 0xFD90, 0x00B0, 0xEFF0, 0xF3CC, 0x9775, + 0x716C, 0x1624, 0x10EA, 0x00EE, 0x02E5, 0xFFD2, 0x0033, 0x0003, + 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514, + 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003, + 0xFFFE, 0xFFD6, 0x0047, 0xFD72, 0x0051, 0xEFA6, 0xF16F, 0x9514, + 0x6F5E, 0x138A, 0x10C8, 0x007E, 0x02CA, 0xFFC9, 0x0030, 0x0003, + 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD, + 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002, + 0xFFFE, 0xFFD3, 0x0040, 0xFD54, 0xFFEC, 0xEF68, 0xEEFC, 0x92CD, + 0x6D33, 0x1104, 0x1098, 0x0014, 0x02AC, 0xFFC0, 0x002D, 0x0002, + 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514, + 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003, + 0x0030, 0xFFC9, 0x02CA, 0x007E, 0x10C8, 0x138A, 0x6F5E, 0x9514, + 0xF16F, 0xEFA6, 0x0051, 0xFD72, 0x0047, 0xFFD6, 0xFFFE, 0x0003, + 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775, + 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003, + 0x0033, 0xFFD2, 0x02E5, 0x00EE, 0x10EA, 0x1624, 0x716C, 0x9775, + 0xF3CC, 0xEFF0, 0x00B0, 0xFD90, 0x004D, 0xFFD8, 0xFFFE, 0x0003, + 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE, + 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003, + 0x0037, 0xFFDC, 0x0300, 0x0163, 0x10FD, 0x18D1, 0x735C, 0x99EE, + 0xF613, 0xF046, 0x0108, 0xFDB0, 0x0053, 0xFFDB, 0xFFFE, 0x0003, + 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80, + 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0003, + 0x003A, 0xFFE7, 0x0319, 0x01DE, 0x1100, 0x1B8E, 0x752C, 0x9C80, + 0xF845, 0xF0A7, 0x015B, 0xFDD0, 0x0057, 0xFFDE, 0xFFFF, 0x0004, + 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27, + 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004, + 0x003D, 0xFFF3, 0x0331, 0x025E, 0x10F2, 0x1E5C, 0x76DB, 0x9F27, + 0xFA5F, 0xF111, 0x01A8, 0xFDF0, 0x005B, 0xFFE0, 0xFFFF, 0x0004, + 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1, + 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0004, + 0x0040, 0x0001, 0x0346, 0x02E3, 0x10D3, 0x2139, 0x7869, 0xA1E1, + 0xFC61, 0xF184, 0x01EE, 0xFE10, 0x005E, 0xFFE2, 0xFFFF, 0x0005, + 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF, + 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0005, + 0x0043, 0x0010, 0x0359, 0x036C, 0x10A2, 0x2425, 0x79D3, 0xA4AF, + 0xFE4C, 0xF1FF, 0x022F, 0xFE2F, 0x0061, 0xFFE4, 0xFFFF, 0x0006, + 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D, + 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006, + 0x0046, 0x001F, 0x036A, 0x03F9, 0x105D, 0x271C, 0x7B1A, 0xA78D, + 0x001F, 0xF282, 0x0269, 0xFE4F, 0x0062, 0xFFE7, 0xFFFF, 0x0006, + 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B, + 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0006, + 0x004A, 0x0030, 0x0377, 0x048B, 0x1004, 0x2A1F, 0x7C3D, 0xAA7B, + 0x01D8, 0xF30B, 0x029E, 0xFE6F, 0x0063, 0xFFE9, 0xFFFF, 0x0007, + 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75, + 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007, + 0x004D, 0x0043, 0x0382, 0x0520, 0x0F97, 0x2D2C, 0x7D3A, 0xAD75, + 0x037A, 0xF399, 0x02CE, 0xFE8E, 0x0064, 0xFFEB, 0x0000, 0x0007, + 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C, + 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0007, + 0x0050, 0x0056, 0x038A, 0x05B7, 0x0F14, 0x3041, 0x7E12, 0xB07C, + 0x0502, 0xF42C, 0x02F7, 0xFEAC, 0x0064, 0xFFEC, 0x0000, 0x0008, + 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C, + 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0008, + 0x0053, 0x006B, 0x038E, 0x0652, 0x0E7C, 0x335D, 0x7EC2, 0xB38C, + 0x0671, 0xF4C3, 0x031C, 0xFECA, 0x0063, 0xFFEE, 0x0000, 0x0009, + 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4, + 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x0009, + 0x0056, 0x0080, 0x038F, 0x06EE, 0x0DCE, 0x367E, 0x7F4D, 0xB6A4, + 0x07C8, 0xF55C, 0x033B, 0xFEE6, 0x0062, 0xFFEF, 0x0000, 0x000A, + 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4, + 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000A, + 0x0058, 0x0098, 0x038C, 0x078C, 0x0D08, 0x39A4, 0x7FB0, 0xB9C4, + 0x0905, 0xF5F9, 0x0354, 0xFF02, 0x0061, 0xFFF1, 0x0000, 0x000B, + 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7, + 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000B, + 0x005B, 0x00AF, 0x0385, 0x082B, 0x0C2B, 0x3CCB, 0x7FEB, 0xBCE7, + 0x0A2A, 0xF697, 0x0369, 0xFF1D, 0x005F, 0xFFF2, 0x0000, 0x000D, + 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E, + 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x000D, + 0x005D, 0x00C8, 0x037A, 0x08CA, 0x0B37, 0x3FF2, 0x7FFF, 0xC00E, + 0x0B37, 0xF736, 0x037A, 0xFF38, 0x005D, 0xFFF3, 0x0000, 0x0000 +}; + +static void MP3AB0(int32_t* v) +{ + /* Part 2 - 100% Accurate */ + static const uint16_t LUT2[8] = + { + 0xFEC4, 0xF4FA, 0xC5E4, 0xE1C4, + 0x1916, 0x4A50, 0xA268, 0x78AE + }; + static const uint16_t LUT3[4] = { 0xFB14, 0xD4DC, 0x31F2, 0x8E3A }; + int i; + + for (i = 0; i < 8; i++) + { + v[16 + i] = v[0 + i] + v[8 + i]; + v[24 + i] = ((v[0 + i] - v[8 + i]) * LUT2[i]) >> 0x10; + } + + /* Part 3: 4-wide butterflies */ + + for (i = 0; i < 4; i++) + { + v[0 + i] = v[16 + i] + v[20 + i]; + v[4 + i] = ((v[16 + i] - v[20 + i]) * LUT3[i]) >> 0x10; + + v[8 + i] = v[24 + i] + v[28 + i]; + v[12 + i] = ((v[24 + i] - v[28 + i]) * LUT3[i]) >> 0x10; + } + + /* Part 4: 2-wide butterflies - 100% Accurate */ + + for (i = 0; i < 16; i += 4) + { + v[16 + i] = v[0 + i] + v[2 + i]; + v[18 + i] = ((v[0 + i] - v[2 + i]) * 0xEC84) >> 0x10; + + v[17 + i] = v[1 + i] + v[3 + i]; + v[19 + i] = ((v[1 + i] - v[3 + i]) * 0x61F8) >> 0x10; + } +} + +void mp3_task(CHle * hle, unsigned int index, uint32_t address) +{ + uint32_t inPtr, outPtr; + uint32_t t6;/* = 0x08A0; - I think these are temporary storage buffers */ + uint32_t t5;/* = 0x0AC0; */ + uint32_t t4;/* = (w1 & 0x1E); */ + + /* Initialization Code */ + uint32_t readPtr; /* s5 */ + uint32_t writePtr; /* s6 */ + uint32_t tmp; + int cnt, cnt2; + + /* I think these are temporary storage buffers */ + t6 = 0x08A0; + t5 = 0x0AC0; + t4 = index; + + writePtr = readPtr = address; + /* Just do that for efficiency... may remove and use directly later anyway */ + memcpy(hle->mp3_buffer() + 0xCE8, hle->dram() + readPtr, 8); + /* This must be a header byte or whatnot */ + readPtr += 8; + + for (cnt = 0; cnt < 0x480; cnt += 0x180) + { + /* DMA: 0xCF0 <- RDRAM[s5] : 0x180 */ + memcpy(hle->mp3_buffer() + 0xCF0, hle->dram() + readPtr, 0x180); + inPtr = 0xCF0; /* s7 */ + outPtr = 0xE70; /* s3 */ + /* --------------- Inner Loop Start -------------------- */ + for (cnt2 = 0; cnt2 < 0x180; cnt2 += 0x40) + { + t6 &= 0xFFE0; + t5 &= 0xFFE0; + t6 |= t4; + t5 |= t4; + InnerLoop(hle, outPtr, inPtr, t6, t5, t4); + t4 = (t4 - 2) & 0x1E; + tmp = t6; + t6 = t5; + t5 = tmp; + inPtr += 0x40; + outPtr += 0x40; + } + /* --------------- Inner Loop End -------------------- */ + memcpy(hle->dram() + writePtr, hle->mp3_buffer() + 0xe70, 0x180); + writePtr += 0x180; + readPtr += 0x180; + } +} + +static void InnerLoop(CHle * hle, uint32_t outPtr, uint32_t inPtr, uint32_t t6, uint32_t t5, uint32_t t4) +{ + /* Part 1: 100% Accurate */ + + /* 0, 1, 3, 2, 7, 6, 4, 5, 7, 6, 4, 5, 0, 1, 3, 2 */ + static const uint16_t LUT6[16] = + { + 0xFFB2, 0xFD3A, 0xF10A, 0xF854, + 0xBDAE, 0xCDA0, 0xE76C, 0xDB94, + 0x1920, 0x4B20, 0xAC7C, 0x7C68, + 0xABEC, 0x9880, 0xDAE8, 0x839C + }; + int i; + uint32_t t0; + uint32_t t1; + uint32_t t2; + uint32_t t3; + int32_t v2 = 0, v4 = 0, v6 = 0, v8 = 0; + uint32_t offset; + uint32_t addptr; + int x; + int32_t mult6; + int32_t mult4; + int tmp; + int32_t hi0; + int32_t hi1; + int32_t vt; + int32_t v[32]; + + v[0] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x00 ^ S16)); + v[31] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x3E ^ S16)); + v[0] += v[31]; + v[1] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x02 ^ S16)); + v[30] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x3C ^ S16)); + v[1] += v[30]; + v[2] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x06 ^ S16)); + v[28] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x38 ^ S16)); + v[2] += v[28]; + v[3] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x04 ^ S16)); + v[29] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x3A ^ S16)); + v[3] += v[29]; + + v[4] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x0E ^ S16)); + v[24] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x30 ^ S16)); + v[4] += v[24]; + v[5] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x0C ^ S16)); + v[25] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x32 ^ S16)); + v[5] += v[25]; + v[6] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x08 ^ S16)); + v[27] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x36 ^ S16)); + v[6] += v[27]; + v[7] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x0A ^ S16)); + v[26] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x34 ^ S16)); + v[7] += v[26]; + + v[8] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x1E ^ S16)); + v[16] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x20 ^ S16)); + v[8] += v[16]; + v[9] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x1C ^ S16)); + v[17] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x22 ^ S16)); + v[9] += v[17]; + v[10] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x18 ^ S16)); + v[19] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x26 ^ S16)); + v[10] += v[19]; + v[11] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x1A ^ S16)); + v[18] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x24 ^ S16)); + v[11] += v[18]; + + v[12] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x10 ^ S16)); + v[23] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x2E ^ S16)); + v[12] += v[23]; + v[13] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x12 ^ S16)); + v[22] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x2C ^ S16)); + v[13] += v[22]; + v[14] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x16 ^ S16)); + v[20] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x28 ^ S16)); + v[14] += v[20]; + v[15] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x14 ^ S16)); + v[21] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x2A ^ S16)); + v[15] += v[21]; + + /* Part 2-4 */ + + MP3AB0(v); + + /* Part 5 - 1-Wide Butterflies - 100% Accurate but need SSVs!!! */ + + t0 = t6 + 0x100; + t1 = t6 + 0x200; + t2 = t5 + 0x100; + t3 = t5 + 0x200; + + /* 0x13A8 */ + v[1] = 0; + v[11] = ((v[16] - v[17]) * 0xB504) >> 0x10; + + v[16] = -v[16] - v[17]; + v[2] = v[18] + v[19]; + /* ** Store v[11] -> (T6 + 0)** */ + *(int16_t *)(hle->mp3_buffer() + ((t6 + (short)0x0))) = (short)v[11]; + + v[11] = -v[11]; + /* ** Store v[16] -> (T3 + 0)** */ + *(int16_t *)(hle->mp3_buffer() + ((t3 + (short)0x0))) = (short)v[16]; + /* ** Store v[11] -> (T5 + 0)** */ + *(int16_t *)(hle->mp3_buffer() + ((t5 + (short)0x0))) = (short)v[11]; + /* 0x13E8 - Verified.... */ + v[2] = -v[2]; + /* ** Store v[2] -> (T2 + 0)** */ + *(int16_t *)(hle->mp3_buffer() + ((t2 + (short)0x0))) = (short)v[2]; + v[3] = (((v[18] - v[19]) * 0x16A09) >> 0x10) + v[2]; + /* ** Store v[3] -> (T0 + 0)** */ + *(int16_t *)(hle->mp3_buffer() + ((t0 + (short)0x0))) = (short)v[3]; + /* 0x1400 - Verified */ + v[4] = -v[20] - v[21]; + v[6] = v[22] + v[23]; + v[5] = ((v[20] - v[21]) * 0x16A09) >> 0x10; + /* ** Store v[4] -> (T3 + 0xFF80) */ + *(int16_t *)(hle->mp3_buffer() + ((t3 + (short)0xFF80))) = (short)v[4]; + v[7] = ((v[22] - v[23]) * 0x2D413) >> 0x10; + v[5] = v[5] - v[4]; + v[7] = v[7] - v[5]; + v[6] = v[6] + v[6]; + v[5] = v[5] - v[6]; + v[4] = -v[4] - v[6]; + /* *** Store v[7] -> (T1 + 0xFF80) */ + *(int16_t *)(hle->mp3_buffer() + ((t1 + (short)0xFF80))) = (short)v[7]; + /* *** Store v[4] -> (T2 + 0xFF80) */ + *(int16_t *)(hle->mp3_buffer() + ((t2 + (short)0xFF80))) = (short)v[4]; + /* *** Store v[5] -> (T0 + 0xFF80) */ + *(int16_t *)(hle->mp3_buffer() + ((t0 + (short)0xFF80))) = (short)v[5]; + v[8] = v[24] + v[25]; + + v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10; + v[2] = v[8] + v[9]; + v[11] = ((v[26] - v[27]) * 0x2D413) >> 0x10; + v[13] = ((v[28] - v[29]) * 0x2D413) >> 0x10; + + v[10] = v[26] + v[27]; + v[10] = v[10] + v[10]; + v[12] = v[28] + v[29]; + v[12] = v[12] + v[12]; + v[14] = v[30] + v[31]; + v[3] = v[8] + v[10]; + v[14] = v[14] + v[14]; + v[13] = (v[13] - v[2]) + v[12]; + v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - (v[11] + v[2]); + v[14] = -(v[14] + v[14]) + v[3]; + v[17] = v[13] - v[10]; + v[9] = v[9] + v[14]; + /* ** Store v[9] -> (T6 + 0x40) */ + *(int16_t *)(hle->mp3_buffer() + ((t6 + (short)0x40))) = (short)v[9]; + v[11] = v[11] - v[13]; + /* ** Store v[17] -> (T0 + 0xFFC0) */ + *(int16_t *)(hle->mp3_buffer() + ((t0 + (short)0xFFC0))) = (short)v[17]; + v[12] = v[8] - v[12]; + /* ** Store v[11] -> (T0 + 0x40) */ + *(int16_t *)(hle->mp3_buffer() + ((t0 + (short)0x40))) = (short)v[11]; + v[8] = -v[8]; + /* ** Store v[15] -> (T1 + 0xFFC0) */ + *(int16_t *)(hle->mp3_buffer() + ((t1 + (short)0xFFC0))) = (short)v[15]; + v[10] = -v[10] - v[12]; + /* ** Store v[12] -> (T2 + 0x40) */ + *(int16_t *)(hle->mp3_buffer() + ((t2 + (short)0x40))) = (short)v[12]; + /* ** Store v[8] -> (T3 + 0xFFC0) */ + *(int16_t *)(hle->mp3_buffer() + ((t3 + (short)0xFFC0))) = (short)v[8]; + /* ** Store v[14] -> (T5 + 0x40) */ + *(int16_t *)(hle->mp3_buffer() + ((t5 + (short)0x40))) = (short)v[14]; + /* ** Store v[10] -> (T2 + 0xFFC0) */ + *(int16_t *)(hle->mp3_buffer() + ((t2 + (short)0xFFC0))) = (short)v[10]; + /* 0x14FC - Verified... */ + + /* Part 6 - 100% Accurate */ + + v[0] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x00 ^ S16)); + v[31] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x3E ^ S16)); + v[0] -= v[31]; + v[1] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x02 ^ S16)); + v[30] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x3C ^ S16)); + v[1] -= v[30]; + v[2] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x06 ^ S16)); + v[28] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x38 ^ S16)); + v[2] -= v[28]; + v[3] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x04 ^ S16)); + v[29] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x3A ^ S16)); + v[3] -= v[29]; + + v[4] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x0E ^ S16)); + v[24] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x30 ^ S16)); + v[4] -= v[24]; + v[5] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x0C ^ S16)); + v[25] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x32 ^ S16)); + v[5] -= v[25]; + v[6] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x08 ^ S16)); + v[27] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x36 ^ S16)); + v[6] -= v[27]; + v[7] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x0A ^ S16)); + v[26] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x34 ^ S16)); + v[7] -= v[26]; + + v[8] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x1E ^ S16)); + v[16] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x20 ^ S16)); + v[8] -= v[16]; + v[9] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x1C ^ S16)); + v[17] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x22 ^ S16)); + v[9] -= v[17]; + v[10] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x18 ^ S16)); + v[19] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x26 ^ S16)); + v[10] -= v[19]; + v[11] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x1A ^ S16)); + v[18] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x24 ^ S16)); + v[11] -= v[18]; + + v[12] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x10 ^ S16)); + v[23] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x2E ^ S16)); + v[12] -= v[23]; + v[13] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x12 ^ S16)); + v[22] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x2C ^ S16)); + v[13] -= v[22]; + v[14] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x16 ^ S16)); + v[20] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x28 ^ S16)); + v[14] -= v[20]; + v[15] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x14 ^ S16)); + v[21] = *(int16_t *)(hle->mp3_buffer() + inPtr + (0x2A ^ S16)); + v[15] -= v[21]; + + for (i = 0; i < 16; i++) + { + v[0 + i] = (v[0 + i] * LUT6[i]) >> 0x10; + } + v[0] = v[0] + v[0]; + v[1] = v[1] + v[1]; + v[2] = v[2] + v[2]; + v[3] = v[3] + v[3]; + v[4] = v[4] + v[4]; + v[5] = v[5] + v[5]; + v[6] = v[6] + v[6]; + v[7] = v[7] + v[7]; + v[12] = v[12] + v[12]; + v[13] = v[13] + v[13]; + v[15] = v[15] + v[15]; + + MP3AB0(v); + + /* Part 7: - 100% Accurate + SSV - Unoptimized */ + + v[0] = (v[17] + v[16]) >> 1; + v[1] = ((v[17] * (int)((short)0xA57E * 2)) + (v[16] * 0xB504)) >> 0x10; + v[2] = -v[18] - v[19]; + v[3] = ((v[18] - v[19]) * 0x16A09) >> 0x10; + v[4] = v[20] + v[21] + v[0]; + v[5] = (((v[20] - v[21]) * 0x16A09) >> 0x10) + v[1]; + v[6] = (((v[22] + v[23]) << 1) + v[0]) - v[2]; + v[7] = (((v[22] - v[23]) * 0x2D413) >> 0x10) + v[0] + v[1] + v[3]; + /* 0x16A8 */ + /* Save v[0] -> (T3 + 0xFFE0) */ + *(int16_t *)(hle->mp3_buffer() + ((t3 + (short)0xFFE0))) = (short) - v[0]; + v[8] = v[24] + v[25]; + v[9] = ((v[24] - v[25]) * 0x16A09) >> 0x10; + v[10] = ((v[26] + v[27]) << 1) + v[8]; + v[11] = (((v[26] - v[27]) * 0x2D413) >> 0x10) + v[8] + v[9]; + v[12] = v[4] - ((v[28] + v[29]) << 1); + /* ** Store v12 -> (T2 + 0x20) */ + *(int16_t *)(hle->mp3_buffer() + ((t2 + (short)0x20))) = (short)v[12]; + v[13] = (((v[28] - v[29]) * 0x2D413) >> 0x10) - v[12] - v[5]; + v[14] = v[30] + v[31]; + v[14] = v[14] + v[14]; + v[14] = v[14] + v[14]; + v[14] = v[6] - v[14]; + v[15] = (((v[30] - v[31]) * 0x5A827) >> 0x10) - v[7]; + /* Store v14 -> (T5 + 0x20) */ + *(int16_t *)(hle->mp3_buffer() + ((t5 + (short)0x20))) = (short)v[14]; + v[14] = v[14] + v[1]; + /* Store v[14] -> (T6 + 0x20) */ + *(int16_t *)(hle->mp3_buffer() + ((t6 + (short)0x20))) = (short)v[14]; + /* Store v[15] -> (T1 + 0xFFE0) */ + *(int16_t *)(hle->mp3_buffer() + ((t1 + (short)0xFFE0))) = (short)v[15]; + v[9] = v[9] + v[10]; + v[1] = v[1] + v[6]; + v[6] = v[10] - v[6]; + v[1] = v[9] - v[1]; + /* Store v[6] -> (T5 + 0x60) */ + *(int16_t *)(hle->mp3_buffer() + ((t5 + (short)0x60))) = (short)v[6]; + v[10] = v[10] + v[2]; + v[10] = v[4] - v[10]; + /* Store v[10] -> (T2 + 0xFFA0) */ + *(int16_t *)(hle->mp3_buffer() + ((t2 + (short)0xFFA0))) = (short)v[10]; + v[12] = v[2] - v[12]; + /* Store v[12] -> (T2 + 0xFFE0) */ + *(int16_t *)(hle->mp3_buffer() + ((t2 + (short)0xFFE0))) = (short)v[12]; + v[5] = v[4] + v[5]; + v[4] = v[8] - v[4]; + /* Store v[4] -> (T2 + 0x60) */ + *(int16_t *)(hle->mp3_buffer() + ((t2 + (short)0x60))) = (short)v[4]; + v[0] = v[0] - v[8]; + /* Store v[0] -> (T3 + 0xFFA0) */ + *(int16_t *)(hle->mp3_buffer() + ((t3 + (short)0xFFA0))) = (short)v[0]; + v[7] = v[7] - v[11]; + /* Store v[7] -> (T1 + 0xFFA0) */ + *(int16_t *)(hle->mp3_buffer() + ((t1 + (short)0xFFA0))) = (short)v[7]; + v[11] = v[11] - v[3]; + /* Store v[1] -> (T6 + 0x60) */ + *(int16_t *)(hle->mp3_buffer() + ((t6 + (short)0x60))) = (short)v[1]; + v[11] = v[11] - v[5]; + /* Store v[11] -> (T0 + 0x60) */ + *(int16_t *)(hle->mp3_buffer() + ((t0 + (short)0x60))) = (short)v[11]; + v[3] = v[3] - v[13]; + /* Store v[3] -> (T0 + 0x20) */ + *(int16_t *)(hle->mp3_buffer() + ((t0 + (short)0x20))) = (short)v[3]; + v[13] = v[13] + v[2]; + /* Store v[13] -> (T0 + 0xFFE0) */ + *(int16_t *)(hle->mp3_buffer() + ((t0 + (short)0xFFE0))) = (short)v[13]; + v[2] = (v[5] - v[2]) - v[9]; + /* Store v[2] -> (T0 + 0xFFA0) */ + *(int16_t *)(hle->mp3_buffer() + ((t0 + (short)0xFFA0))) = (short)v[2]; + /* 0x7A8 - Verified... */ + + /* Step 8 - Dewindowing */ + + addptr = t6 & 0xFFE0; + + offset = 0x10 - (t4 >> 1); + for (x = 0; x < 8; x++) + { + int32_t v0; + int32_t v18; + v2 = v4 = v6 = v8 = 0; + + for (i = 7; i >= 0; i--) + { + v2 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x00) * (short)DeWindowLUT[offset + 0x00] + 0x4000) >> 0xF; + v4 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x10) * (short)DeWindowLUT[offset + 0x08] + 0x4000) >> 0xF; + v6 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x20) * (short)DeWindowLUT[offset + 0x20] + 0x4000) >> 0xF; + v8 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x30) * (short)DeWindowLUT[offset + 0x28] + 0x4000) >> 0xF; + addptr += 2; + offset++; + } + v0 = v2 + v4; + v18 = v6 + v8; + /* Clamp(v0); */ + /* Clamp(v18); */ + /* clamp??? */ + *(int16_t *)(hle->mp3_buffer() + (outPtr ^ S16)) = v0; + *(int16_t *)(hle->mp3_buffer() + ((outPtr + 2)^S16)) = v18; + outPtr += 4; + addptr += 0x30; + offset += 0x38; + } + + offset = 0x10 - (t4 >> 1) + 8 * 0x40; + v2 = v4 = 0; + for (i = 0; i < 4; i++) + { + v2 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x00) * (short)DeWindowLUT[offset + 0x00] + 0x4000) >> 0xF; + v2 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x10) * (short)DeWindowLUT[offset + 0x08] + 0x4000) >> 0xF; + addptr += 2; + offset++; + v4 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x00) * (short)DeWindowLUT[offset + 0x00] + 0x4000) >> 0xF; + v4 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x10) * (short)DeWindowLUT[offset + 0x08] + 0x4000) >> 0xF; + addptr += 2; + offset++; + } + mult6 = *(int32_t *)(hle->mp3_buffer() + 0xCE8); + mult4 = *(int32_t *)(hle->mp3_buffer() + 0xCEC); + if (t4 & 0x2) + { + v2 = (v2 **(uint32_t *)(hle->mp3_buffer() + 0xCE8)) >> 0x10; + *(int16_t *)(hle->mp3_buffer() + (outPtr ^ S16)) = v2; + } + else + { + v4 = (v4 **(uint32_t *)(hle->mp3_buffer() + 0xCE8)) >> 0x10; + *(int16_t *)(hle->mp3_buffer() + (outPtr ^ S16)) = v4; + mult4 = *(uint32_t *)(hle->mp3_buffer() + 0xCE8); + } + addptr -= 0x50; + + for (x = 0; x < 8; x++) + { + int32_t v0; + int32_t v18; + v2 = v4 = v6 = v8 = 0; + + offset = (0x22F - (t4 >> 1) + x * 0x40); + + for (i = 0; i < 4; i++) + { + v2 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x20) * (short)DeWindowLUT[offset + 0x00] + 0x4000) >> 0xF; + v2 -= ((int) * (int16_t *)(hle->mp3_buffer() + ((addptr + 2)) + 0x20) * (short)DeWindowLUT[offset + 0x01] + 0x4000) >> 0xF; + v4 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x30) * (short)DeWindowLUT[offset + 0x08] + 0x4000) >> 0xF; + v4 -= ((int) * (int16_t *)(hle->mp3_buffer() + ((addptr + 2)) + 0x30) * (short)DeWindowLUT[offset + 0x09] + 0x4000) >> 0xF; + v6 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x00) * (short)DeWindowLUT[offset + 0x20] + 0x4000) >> 0xF; + v6 -= ((int) * (int16_t *)(hle->mp3_buffer() + ((addptr + 2)) + 0x00) * (short)DeWindowLUT[offset + 0x21] + 0x4000) >> 0xF; + v8 += ((int) * (int16_t *)(hle->mp3_buffer() + (addptr) + 0x10) * (short)DeWindowLUT[offset + 0x28] + 0x4000) >> 0xF; + v8 -= ((int) * (int16_t *)(hle->mp3_buffer() + ((addptr + 2)) + 0x10) * (short)DeWindowLUT[offset + 0x29] + 0x4000) >> 0xF; + addptr += 4; + offset += 2; + } + v0 = v2 + v4; + v18 = v6 + v8; + /* Clamp(v0); */ + /* Clamp(v18); */ + /* clamp??? */ + *(int16_t *)(hle->mp3_buffer() + ((outPtr + 2)^S16)) = v0; + *(int16_t *)(hle->mp3_buffer() + ((outPtr + 4)^S16)) = v18; + outPtr += 4; + addptr -= 0x50; + } + + tmp = outPtr; + hi0 = mult6; + hi1 = mult4; + + hi0 = (int)hi0 >> 0x10; + hi1 = (int)hi1 >> 0x10; + for (i = 0; i < 8; i++) + { + /* v0 */ + vt = (*(int16_t *)(hle->mp3_buffer() + ((tmp - 0x40)^S16)) * hi0); + *(int16_t *)((uint8_t *)hle->mp3_buffer() + ((tmp - 0x40)^S16)) = clamp_s16(vt); + + /* v17 */ + vt = (*(int16_t *)(hle->mp3_buffer() + ((tmp - 0x30)^S16)) * hi0); + *(int16_t *)((uint8_t *)hle->mp3_buffer() + ((tmp - 0x30)^S16)) = clamp_s16(vt); + + /* v2 */ + vt = (*(int16_t *)(hle->mp3_buffer() + ((tmp - 0x1E)^S16)) * hi1); + *(int16_t *)((uint8_t *)hle->mp3_buffer() + ((tmp - 0x1E)^S16)) = clamp_s16(vt); + + /* v4 */ + vt = (*(int16_t *)(hle->mp3_buffer() + ((tmp - 0xE)^S16)) * hi1); + *(int16_t *)((uint8_t *)hle->mp3_buffer() + ((tmp - 0xE)^S16)) = clamp_s16(vt); + + tmp += 2; + } +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/musyx.cpp b/Source/Android/PluginRSP/musyx.cpp new file mode 100644 index 000000000..a16c7ae06 --- /dev/null +++ b/Source/Android/PluginRSP/musyx.cpp @@ -0,0 +1,920 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" +#include + +#include "arithmetics.h" +#include "audio.h" +#include "mem.h" + +/* various constants */ +enum { SUBFRAME_SIZE = 192 }; +enum { MAX_VOICES = 32 }; + +enum { SAMPLE_BUFFER_SIZE = 0x200 }; + +enum +{ + SFD_SFX_INDEX = 0x2, + SFD_VOICE_BITMASK = 0x4, + SFD_STATE_PTR = 0x8, + SFD_SFX_PTR = 0xc, + SFD_VOICES = 0x10, + + /* v2 only */ + SFD2_10_PTR = 0x10, + SFD2_14_BITMASK = 0x14, + SFD2_15_BITMASK = 0x15, + SFD2_16_BITMASK = 0x16, + SFD2_18_PTR = 0x18, + SFD2_1C_PTR = 0x1c, + SFD2_20_PTR = 0x20, + SFD2_24_PTR = 0x24, + SFD2_VOICES = 0x28 +}; + +enum +{ + VOICE_ENV_BEGIN = 0x00, + VOICE_ENV_STEP = 0x10, + VOICE_PITCH_Q16 = 0x20, + VOICE_PITCH_SHIFT = 0x22, + VOICE_CATSRC_0 = 0x24, + VOICE_CATSRC_1 = 0x30, + VOICE_ADPCM_FRAMES = 0x3c, + VOICE_SKIP_SAMPLES = 0x3e, + + /* for PCM16 */ + VOICE_U16_40 = 0x40, + VOICE_U16_42 = 0x42, + + /* for ADPCM */ + VOICE_ADPCM_TABLE_PTR = 0x40, + + VOICE_INTERLEAVED_PTR = 0x44, + VOICE_END_POINT = 0x48, + VOICE_RESTART_POINT = 0x4a, + VOICE_U16_4E = 0x4e, + + VOICE_SIZE = 0x50 +}; + +enum +{ + CATSRC_PTR1 = 0x00, + CATSRC_PTR2 = 0x04, + CATSRC_SIZE1 = 0x08, + CATSRC_SIZE2 = 0x0a +}; + +enum +{ + STATE_LAST_SAMPLE = 0x0, + STATE_BASE_VOL = 0x100, + STATE_CC0 = 0x110, + STATE_740_LAST4_V1 = 0x290, + + STATE_740_LAST4_V2 = 0x110 +}; + +enum +{ + SFX_CBUFFER_PTR = 0x00, + SFX_CBUFFER_LENGTH = 0x04, + SFX_TAP_COUNT = 0x08, + SFX_FIR4_HGAIN = 0x0a, + SFX_TAP_DELAYS = 0x0c, + SFX_TAP_GAINS = 0x2c, + SFX_U16_3C = 0x3c, + SFX_U16_3E = 0x3e, + SFX_FIR4_HCOEFFS = 0x40 +}; + +/* struct definition */ +typedef struct +{ + /* internal subframes */ + int16_t left[SUBFRAME_SIZE]; + int16_t right[SUBFRAME_SIZE]; + int16_t cc0[SUBFRAME_SIZE]; + int16_t e50[SUBFRAME_SIZE]; + + /* internal subframes base volumes */ + int32_t base_vol[4]; + + /* */ + int16_t subframe_740_last4[4]; +} musyx_t; + +typedef void (*mix_sfx_with_main_subframes_t)(musyx_t *musyx, const int16_t *subframe, const uint16_t* gains); + +/* helper functions prototypes */ +static void load_base_vol(CHle * hle, int32_t *base_vol, uint32_t address); +static void save_base_vol(CHle * hle, const int32_t *base_vol, uint32_t address); +static void update_base_vol(CHle * hle, int32_t *base_vol, uint32_t voice_mask, uint32_t last_sample_ptr, uint8_t mask_15, uint32_t ptr_24); +static void init_subframes_v1(musyx_t *musyx); +static void init_subframes_v2(musyx_t *musyx); + +static uint32_t voice_stage(CHle * hle, musyx_t *musyx, uint32_t voice_ptr, uint32_t last_sample_ptr); +static void dma_cat8(CHle * hle, uint8_t *dst, uint32_t catsrc_ptr); +static void dma_cat16(CHle * hle, uint16_t *dst, uint32_t catsrc_ptr); +static void sfx_stage(CHle * hle, mix_sfx_with_main_subframes_t mix_sfx_with_main_subframes, musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx); +static void load_samples_PCM16(CHle * hle, uint32_t voice_ptr, int16_t *samples, unsigned *segbase, unsigned *offset); +static void load_samples_ADPCM(CHle * hle, uint32_t voice_ptr, int16_t *samples, unsigned *segbase, unsigned *offset); +static void mix_voice_samples(CHle * hle, musyx_t *musyx, uint32_t voice_ptr, const int16_t *samples, unsigned segbase, unsigned offset, uint32_t last_sample_ptr); +static void adpcm_decode_frames(CHle * hle, int16_t *dst, const uint8_t *src, const int16_t *table, uint8_t count, uint8_t skip_samples); +static void adpcm_predict_frame(int16_t *dst, const uint8_t *src, const uint8_t *nibbles, unsigned int rshift); + +static void mix_sfx_with_main_subframes_v1(musyx_t *musyx, const int16_t *subframe, const uint16_t* gains); +static void mix_sfx_with_main_subframes_v2(musyx_t *musyx, const int16_t *subframe, const uint16_t* gains); + +static void mix_samples(int16_t *y, int16_t x, int16_t hgain); +static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain); +static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs); + +static void interleave_stage_v1(CHle * hle, musyx_t *musyx, uint32_t output_ptr); + +static void interleave_stage_v2(CHle * hle, musyx_t *musyx, uint16_t mask_16, uint32_t ptr_18, uint32_t ptr_1c, uint32_t output_ptr); + +static int32_t dot4(const int16_t *x, const int16_t *y) +{ + int32_t accu = 0; + + for (size_t i = 0; i < 4; ++i) + { + accu = clamp_s16(accu + (((int32_t)x[i] * (int32_t)y[i]) >> 15)); + } + return accu; +} + +/************************************************************************** +* MusyX v1 audio ucode +**************************************************************************/ +void musyx_v1_task(CHle * hle) +{ + uint32_t sfd_ptr = *dmem_u32(hle, TASK_DATA_PTR); + uint32_t sfd_count = *dmem_u32(hle, TASK_DATA_SIZE); + uint32_t state_ptr; + musyx_t musyx; + + hle->VerboseMessage("musyx_v1_task: *data=%x, #SF=%d", sfd_ptr,sfd_count); + + state_ptr = *dram_u32(hle, sfd_ptr + SFD_STATE_PTR); + + /* load initial state */ + load_base_vol(hle, musyx.base_vol, state_ptr + STATE_BASE_VOL); + dram_load_u16(hle, (uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); + dram_load_u16(hle, (uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4_V1, 4); + + for (;;) + { + /* parse SFD structure */ + uint16_t sfx_index = *dram_u16(hle, sfd_ptr + SFD_SFX_INDEX); + uint32_t voice_mask = *dram_u32(hle, sfd_ptr + SFD_VOICE_BITMASK); + uint32_t sfx_ptr = *dram_u32(hle, sfd_ptr + SFD_SFX_PTR); + uint32_t voice_ptr = sfd_ptr + SFD_VOICES; + uint32_t last_sample_ptr = state_ptr + STATE_LAST_SAMPLE; + uint32_t output_ptr; + + /* initialize internal subframes using updated base volumes */ + update_base_vol(hle, musyx.base_vol, voice_mask, last_sample_ptr, 0, 0); + init_subframes_v1(&musyx); + + /* active voices get mixed into L,R,cc0,e50 subframes (optional) */ + output_ptr = voice_stage(hle, &musyx, voice_ptr, last_sample_ptr); + + /* apply delay-based effects (optional) */ + sfx_stage(hle, mix_sfx_with_main_subframes_v1, &musyx, sfx_ptr, sfx_index); + + /* emit interleaved L,R subframes */ + interleave_stage_v1(hle, &musyx, output_ptr); + + --sfd_count; + if (sfd_count == 0) + { + break; + } + + sfd_ptr += SFD_VOICES + MAX_VOICES * VOICE_SIZE; + state_ptr = *dram_u32(hle, sfd_ptr + SFD_STATE_PTR); + } + + /* writeback updated state */ + save_base_vol(hle, musyx.base_vol, state_ptr + STATE_BASE_VOL); + dram_store_u16(hle, (uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); + dram_store_u16(hle, (uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4_V1, 4); +} + +/************************************************************************** +* MusyX v2 audio ucode +**************************************************************************/ +void musyx_v2_task(CHle * hle) +{ + uint32_t sfd_ptr = *dmem_u32(hle, TASK_DATA_PTR); + uint32_t sfd_count = *dmem_u32(hle, TASK_DATA_SIZE); + musyx_t musyx; + + hle->VerboseMessage("musyx_v2_task: *data=%x, #SF=%d", sfd_ptr, sfd_count); + + for (;;) + { + /* parse SFD structure */ + uint16_t sfx_index = *dram_u16(hle, sfd_ptr + SFD_SFX_INDEX); + uint32_t voice_mask = *dram_u32(hle, sfd_ptr + SFD_VOICE_BITMASK); + uint32_t state_ptr = *dram_u32(hle, sfd_ptr + SFD_STATE_PTR); + uint32_t sfx_ptr = *dram_u32(hle, sfd_ptr + SFD_SFX_PTR); + uint32_t voice_ptr = sfd_ptr + SFD2_VOICES; + + uint32_t ptr_10 = *dram_u32(hle, sfd_ptr + SFD2_10_PTR); + uint8_t mask_14 = *dram_u8 (hle, sfd_ptr + SFD2_14_BITMASK); + uint8_t mask_15 = *dram_u8 (hle, sfd_ptr + SFD2_15_BITMASK); + uint16_t mask_16 = *dram_u16(hle, sfd_ptr + SFD2_16_BITMASK); + uint32_t ptr_18 = *dram_u32(hle, sfd_ptr + SFD2_18_PTR); + uint32_t ptr_1c = *dram_u32(hle, sfd_ptr + SFD2_1C_PTR); + uint32_t ptr_20 = *dram_u32(hle, sfd_ptr + SFD2_20_PTR); + uint32_t ptr_24 = *dram_u32(hle, sfd_ptr + SFD2_24_PTR); + + uint32_t last_sample_ptr = state_ptr + STATE_LAST_SAMPLE; + uint32_t output_ptr; + + /* load state */ + load_base_vol(hle, musyx.base_vol, state_ptr + STATE_BASE_VOL); + dram_load_u16(hle, (uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4_V2, 4); + + /* initialize internal subframes using updated base volumes */ + update_base_vol(hle, musyx.base_vol, voice_mask, last_sample_ptr, mask_15, ptr_24); + init_subframes_v2(&musyx); + + if (ptr_10) + { + /* TODO */ + hle->WarnMessage("ptr_10=%08x mask_14=%02x ptr_24=%08x", ptr_10, mask_14, ptr_24); + } + + /* active voices get mixed into L,R,cc0,e50 subframes (optional) */ + output_ptr = voice_stage(hle, &musyx, voice_ptr, last_sample_ptr); + + /* apply delay-based effects (optional) */ + sfx_stage(hle, mix_sfx_with_main_subframes_v2, &musyx, sfx_ptr, sfx_index); + + dram_store_u16(hle, (uint16_t*)musyx.left, output_ptr , SUBFRAME_SIZE); + dram_store_u16(hle, (uint16_t*)musyx.right, output_ptr + 2*SUBFRAME_SIZE, SUBFRAME_SIZE); + dram_store_u16(hle, (uint16_t*)musyx.cc0, output_ptr + 4*SUBFRAME_SIZE, SUBFRAME_SIZE); + + /* store state */ + save_base_vol(hle, musyx.base_vol, state_ptr + STATE_BASE_VOL); + dram_store_u16(hle, (uint16_t*)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4_V2, 4); + + if (mask_16) + { + interleave_stage_v2(hle, &musyx, mask_16, ptr_18, ptr_1c, ptr_20); + } + + --sfd_count; + if (sfd_count == 0) + { + break; + } + + sfd_ptr += SFD2_VOICES + MAX_VOICES * VOICE_SIZE; + } +} + +static void load_base_vol(CHle * hle, int32_t *base_vol, uint32_t address) +{ + base_vol[0] = ((uint32_t)(*dram_u16(hle, address)) << 16) | (*dram_u16(hle, address + 8)); + base_vol[1] = ((uint32_t)(*dram_u16(hle, address + 2)) << 16) | (*dram_u16(hle, address + 10)); + base_vol[2] = ((uint32_t)(*dram_u16(hle, address + 4)) << 16) | (*dram_u16(hle, address + 12)); + base_vol[3] = ((uint32_t)(*dram_u16(hle, address + 6)) << 16) | (*dram_u16(hle, address + 14)); +} + +static void save_base_vol(CHle * hle, const int32_t *base_vol, uint32_t address) +{ + unsigned k; + + for (k = 0; k < 4; ++k) + { + *dram_u16(hle, address) = (uint16_t)(base_vol[k] >> 16); + address += 2; + } + + for (k = 0; k < 4; ++k) + { + *dram_u16(hle, address) = (uint16_t)(base_vol[k]); + address += 2; + } +} + +static void update_base_vol(CHle * hle, int32_t *base_vol, + uint32_t voice_mask, uint32_t last_sample_ptr, + uint8_t mask_15, uint32_t ptr_24) +{ + unsigned i, k; + uint32_t mask; + + hle->VerboseMessage("base_vol voice_mask = %08x", voice_mask); + hle->VerboseMessage("BEFORE: base_vol = %08x %08x %08x %08x", base_vol[0], base_vol[1], base_vol[2], base_vol[3]); + + /* optim: skip voices contributions entirely if voice_mask is empty */ + if (voice_mask != 0) + { + for (i = 0, mask = 1; i < MAX_VOICES; ++i, mask <<= 1, last_sample_ptr += 8) + { + if ((voice_mask & mask) == 0) + { + continue; + } + + for (k = 0; k < 4; ++k) + { + base_vol[k] += (int16_t)*dram_u16(hle, last_sample_ptr + k * 2); + } + } + } + + /* optim: skip contributions entirely if mask_15 is empty */ + if (mask_15 != 0) + { + for(i = 0, mask = 1; i < 4; ++i, mask <<= 1, ptr_24 += 8) + { + if ((mask_15 & mask) == 0) + { + continue; + } + + for(k = 0; k < 4; ++k) + { + base_vol[k] += (int16_t)*dram_u16(hle, ptr_24 + k * 2); + } + } + } + + /* apply 3% decay */ + for (k = 0; k < 4; ++k) + { + base_vol[k] = (base_vol[k] * 0x0000f850) >> 16; + } + hle->VerboseMessage("AFTER: base_vol = %08x %08x %08x %08x", base_vol[0], base_vol[1], base_vol[2], base_vol[3]); +} + +static void init_subframes_v1(musyx_t *musyx) +{ + unsigned i; + + int16_t base_cc0 = clamp_s16(musyx->base_vol[2]); + int16_t base_e50 = clamp_s16(musyx->base_vol[3]); + + int16_t *left = musyx->left; + int16_t *right = musyx->right; + int16_t *cc0 = musyx->cc0; + int16_t *e50 = musyx->e50; + + for (i = 0; i < SUBFRAME_SIZE; ++i) + { + *(e50++) = base_e50; + *(left++) = clamp_s16(*cc0 + base_cc0); + *(right++) = clamp_s16(-*cc0 - base_cc0); + *(cc0++) = 0; + } +} + +static void init_subframes_v2(musyx_t *musyx) +{ + unsigned i,k; + int16_t values[4]; + int16_t* subframes[4]; + + for(k = 0; k < 4; ++k) + { + values[k] = clamp_s16(musyx->base_vol[k]); + } + + subframes[0] = musyx->left; + subframes[1] = musyx->right; + subframes[2] = musyx->cc0; + subframes[3] = musyx->e50; + + for (i = 0; i < SUBFRAME_SIZE; ++i) + { + for(k = 0; k < 4; ++k) + { + *(subframes[k]++) = values[k]; + } + } +} + +/* Process voices, and returns interleaved subframe destination address */ +static uint32_t voice_stage(CHle * hle, musyx_t *musyx, uint32_t voice_ptr, uint32_t last_sample_ptr) +{ + uint32_t output_ptr; + int i = 0; + + /* voice stage can be skipped if first voice has no samples */ + if (*dram_u16(hle, voice_ptr + VOICE_CATSRC_0 + CATSRC_SIZE1) == 0) + { + hle->VerboseMessage("Skipping Voice stage"); + output_ptr = *dram_u32(hle, voice_ptr + VOICE_INTERLEAVED_PTR); + } + else + { + /* otherwise process voices until a non null output_ptr is encountered */ + for (;;) + { + /* load voice samples (PCM16 or APDCM) */ + int16_t samples[SAMPLE_BUFFER_SIZE]; + unsigned segbase; + unsigned offset; + + hle->VerboseMessage("Processing Voice #%d", i); + + if (*dram_u8(hle, voice_ptr + VOICE_ADPCM_FRAMES) == 0) + { + load_samples_PCM16(hle, voice_ptr, samples, &segbase, &offset); + } + else + { + load_samples_ADPCM(hle, voice_ptr, samples, &segbase, &offset); + } + + /* mix them with each internal subframes */ + mix_voice_samples(hle, musyx, voice_ptr, samples, segbase, offset, last_sample_ptr + i * 8); + + /* check break condition */ + output_ptr = *dram_u32(hle, voice_ptr + VOICE_INTERLEAVED_PTR); + if (output_ptr != 0) + { + break; + } + + /* next voice */ + ++i; + voice_ptr += VOICE_SIZE; + } + } + + return output_ptr; +} + +static void dma_cat8(CHle * hle, uint8_t *dst, uint32_t catsrc_ptr) +{ + uint32_t ptr1 = *dram_u32(hle, catsrc_ptr + CATSRC_PTR1); + uint32_t ptr2 = *dram_u32(hle, catsrc_ptr + CATSRC_PTR2); + uint16_t size1 = *dram_u16(hle, catsrc_ptr + CATSRC_SIZE1); + uint16_t size2 = *dram_u16(hle, catsrc_ptr + CATSRC_SIZE2); + + size_t count1 = size1; + size_t count2 = size2; + + hle->VerboseMessage("dma_cat: %08x %08x %04x %04x", ptr1, ptr2, size1, size2); + + dram_load_u8(hle, dst, ptr1, count1); + + if (size2 == 0) + { + return; + } + + dram_load_u8(hle, dst + count1, ptr2, count2); +} + +static void dma_cat16(CHle * hle, uint16_t *dst, uint32_t catsrc_ptr) +{ + uint32_t ptr1 = *dram_u32(hle, catsrc_ptr + CATSRC_PTR1); + uint32_t ptr2 = *dram_u32(hle, catsrc_ptr + CATSRC_PTR2); + uint16_t size1 = *dram_u16(hle, catsrc_ptr + CATSRC_SIZE1); + uint16_t size2 = *dram_u16(hle, catsrc_ptr + CATSRC_SIZE2); + + size_t count1 = size1 >> 1; + size_t count2 = size2 >> 1; + + hle->VerboseMessage("dma_cat: %08x %08x %04x %04x", ptr1, ptr2, size1, size2); + + dram_load_u16(hle, dst, ptr1, count1); + + if (size2 == 0) + { + return; + } + dram_load_u16(hle, dst + count1, ptr2, count2); +} + +static void load_samples_PCM16(CHle * hle, uint32_t voice_ptr, int16_t *samples, unsigned *segbase, unsigned *offset) +{ + uint8_t u8_3e = *dram_u8(hle, voice_ptr + VOICE_SKIP_SAMPLES); + uint16_t u16_40 = *dram_u16(hle, voice_ptr + VOICE_U16_40); + uint16_t u16_42 = *dram_u16(hle, voice_ptr + VOICE_U16_42); + + unsigned count = align(u16_40 + u8_3e, 4); + + hle->VerboseMessage("Format: PCM16"); + + *segbase = SAMPLE_BUFFER_SIZE - count; + *offset = u8_3e; + + dma_cat16(hle, (uint16_t *)samples + *segbase, voice_ptr + VOICE_CATSRC_0); + + if (u16_42 != 0) + { + dma_cat16(hle, (uint16_t *)samples, voice_ptr + VOICE_CATSRC_1); + } +} + +static void load_samples_ADPCM(CHle * hle, uint32_t voice_ptr, int16_t *samples, unsigned *segbase, unsigned *offset) +{ + /* decompressed samples cannot exceed 0x400 bytes; + * ADPCM has a compression ratio of 5/16 */ + uint8_t buffer[SAMPLE_BUFFER_SIZE * 2 * 5 / 16]; + int16_t adpcm_table[128]; + + uint8_t u8_3c = *dram_u8(hle, voice_ptr + VOICE_ADPCM_FRAMES ); + uint8_t u8_3d = *dram_u8(hle, voice_ptr + VOICE_ADPCM_FRAMES + 1); + uint8_t u8_3e = *dram_u8(hle, voice_ptr + VOICE_SKIP_SAMPLES ); + uint8_t u8_3f = *dram_u8(hle, voice_ptr + VOICE_SKIP_SAMPLES + 1); + uint32_t adpcm_table_ptr = *dram_u32(hle, voice_ptr + VOICE_ADPCM_TABLE_PTR); + unsigned count; + + hle->VerboseMessage("Format: ADPCM"); + + hle->VerboseMessage("Loading ADPCM table: %08x", adpcm_table_ptr); + dram_load_u16(hle, (uint16_t *)adpcm_table, adpcm_table_ptr, 128); + + count = u8_3c << 5; + + *segbase = SAMPLE_BUFFER_SIZE - count; + *offset = u8_3e & 0x1f; + + dma_cat8(hle, buffer, voice_ptr + VOICE_CATSRC_0); + adpcm_decode_frames(hle, samples + *segbase, buffer, adpcm_table, u8_3c, u8_3e); + + if (u8_3d != 0) + { + dma_cat8(hle, buffer, voice_ptr + VOICE_CATSRC_1); + adpcm_decode_frames(hle, samples, buffer, adpcm_table, u8_3d, u8_3f); + } +} + +static void adpcm_decode_frames(CHle * hle, int16_t *dst, const uint8_t *src, const int16_t *table, uint8_t count, uint8_t skip_samples) +{ + int16_t frame[32]; + const uint8_t *nibbles = src + 8; + unsigned i; + bool jump_gap = false; + + hle->VerboseMessage("ADPCM decode: count=%d, skip=%d", count, skip_samples); + + if (skip_samples >= 32) + { + jump_gap = true; + nibbles += 16; + src += 4; + } + + for (i = 0; i < count; ++i) + { + uint8_t c2 = nibbles[0]; + + const int16_t *book = (c2 & 0xf0) + table; + unsigned int rshift = (c2 & 0x0f); + + adpcm_predict_frame(frame, src, nibbles, rshift); + + memcpy(dst, frame, 2 * sizeof(frame[0])); + adpcm_compute_residuals(dst + 2, frame + 2, book, dst , 6); + adpcm_compute_residuals(dst + 8, frame + 8, book, dst + 6, 8); + adpcm_compute_residuals(dst + 16, frame + 16, book, dst + 14, 8); + adpcm_compute_residuals(dst + 24, frame + 24, book, dst + 22, 8); + + if (jump_gap) + { + nibbles += 8; + src += 32; + } + + jump_gap = !jump_gap; + nibbles += 16; + src += 4; + dst += 32; + } +} + +static void adpcm_predict_frame(int16_t *dst, const uint8_t *src, const uint8_t *nibbles, unsigned int rshift) +{ + unsigned int i; + + *(dst++) = (src[0] << 8) | src[1]; + *(dst++) = (src[2] << 8) | src[3]; + + for (i = 1; i < 16; ++i) + { + uint8_t byte = nibbles[i]; + + *(dst++) = adpcm_predict_sample(byte, 0xf0, 8, rshift); + *(dst++) = adpcm_predict_sample(byte, 0x0f, 12, rshift); + } +} + +static void mix_voice_samples(CHle * hle, musyx_t *musyx, uint32_t voice_ptr, const int16_t *samples, unsigned segbase, unsigned offset, uint32_t last_sample_ptr) +{ + int i, k; + + /* parse VOICE structure */ + const uint16_t pitch_q16 = *dram_u16(hle, voice_ptr + VOICE_PITCH_Q16); + const uint16_t pitch_shift = *dram_u16(hle, voice_ptr + VOICE_PITCH_SHIFT); /* Q4.12 */ + + const uint16_t end_point = *dram_u16(hle, voice_ptr + VOICE_END_POINT); + const uint16_t restart_point = *dram_u16(hle, voice_ptr + VOICE_RESTART_POINT); + + const uint16_t u16_4e = *dram_u16(hle, voice_ptr + VOICE_U16_4E); + + /* init values and pointers */ + const int16_t *sample = samples + segbase + offset + u16_4e; + const int16_t *const sample_end = samples + segbase + end_point; + const int16_t *const sample_restart = samples + (restart_point & 0x7fff) + + (((restart_point & 0x8000) != 0) ? 0x000 : segbase); + + uint32_t pitch_accu = pitch_q16; + uint32_t pitch_step = pitch_shift << 4; + + int32_t v4_env[4]; + int32_t v4_env_step[4]; + int16_t *v4_dst[4]; + int16_t v4[4]; + + dram_load_u32(hle, (uint32_t *)v4_env, voice_ptr + VOICE_ENV_BEGIN, 4); + dram_load_u32(hle, (uint32_t *)v4_env_step, voice_ptr + VOICE_ENV_STEP, 4); + + v4_dst[0] = musyx->left; + v4_dst[1] = musyx->right; + v4_dst[2] = musyx->cc0; + v4_dst[3] = musyx->e50; + + hle->VerboseMessage("Voice debug: segbase=%d" "\tu16_4e=%04x\n" "\tpitch: frac0=%04x shift=%04x\n" "\tend_point=%04x restart_point=%04x\n" "\tenv = %08x %08x %08x %08x\n" "\tenv_step = %08x %08x %08x %08x\n", segbase, u16_4e, pitch_q16, pitch_shift, end_point, restart_point, v4_env[0], v4_env[1], v4_env[2], v4_env[3], v4_env_step[0], v4_env_step[1], v4_env_step[2], v4_env_step[3]); + + for (i = 0; i < SUBFRAME_SIZE; ++i) + { + /* update sample and lut pointers and then pitch_accu */ + const int16_t *lut = (RESAMPLE_LUT + ((pitch_accu & 0xfc00) >> 8)); + int dist; + int16_t v; + + sample += (pitch_accu >> 16); + pitch_accu &= 0xffff; + pitch_accu += pitch_step; + + /* handle end/restart points */ + dist = sample - sample_end; + if (dist >= 0) + { + sample = sample_restart + dist; + } + + /* apply resample filter */ + v = clamp_s16(dot4(sample, lut)); + + for (k = 0; k < 4; ++k) + { + /* envmix */ + int32_t accu = (v * (v4_env[k] >> 16)) >> 15; + v4[k] = clamp_s16(accu); + *(v4_dst[k]) = clamp_s16(accu + *(v4_dst[k])); + + /* update envelopes and dst pointers */ + ++(v4_dst[k]); + v4_env[k] += v4_env_step[k]; + } + } + + /* save last resampled sample */ + dram_store_u16(hle, (uint16_t *)v4, last_sample_ptr, 4); + + hle->VerboseMessage("last_sample = %04x %04x %04x %04x", v4[0], v4[1], v4[2], v4[3]); +} + +static void sfx_stage(CHle * hle, mix_sfx_with_main_subframes_t mix_sfx_with_main_subframes, musyx_t *musyx, uint32_t sfx_ptr, uint16_t idx) +{ + unsigned int i; + + int16_t buffer[SUBFRAME_SIZE + 4]; + int16_t *subframe = buffer + 4; + + uint32_t tap_delays[8]; + int16_t tap_gains[8]; + int16_t fir4_hcoeffs[4]; + + int16_t delayed[SUBFRAME_SIZE]; + int dpos, dlength; + + const uint32_t pos = idx * SUBFRAME_SIZE; + + uint32_t cbuffer_ptr; + uint32_t cbuffer_length; + uint16_t tap_count; + int16_t fir4_hgain; + uint16_t sfx_gains[2]; + + hle->VerboseMessage("SFX: %08x, idx=%d", sfx_ptr, idx); + + if (sfx_ptr == 0) + { + return; + } + + /* load sfx parameters */ + cbuffer_ptr = *dram_u32(hle, sfx_ptr + SFX_CBUFFER_PTR); + cbuffer_length = *dram_u32(hle, sfx_ptr + SFX_CBUFFER_LENGTH); + + tap_count = *dram_u16(hle, sfx_ptr + SFX_TAP_COUNT); + + dram_load_u32(hle, tap_delays, sfx_ptr + SFX_TAP_DELAYS, 8); + dram_load_u16(hle, (uint16_t *)tap_gains, sfx_ptr + SFX_TAP_GAINS, 8); + + fir4_hgain = *dram_u16(hle, sfx_ptr + SFX_FIR4_HGAIN); + dram_load_u16(hle, (uint16_t *)fir4_hcoeffs, sfx_ptr + SFX_FIR4_HCOEFFS, 4); + + sfx_gains[0] = *dram_u16(hle, sfx_ptr + SFX_U16_3C); + sfx_gains[1] = *dram_u16(hle, sfx_ptr + SFX_U16_3E); + + hle->VerboseMessage("cbuffer: ptr=%08x length=%x", cbuffer_ptr, cbuffer_length); + hle->VerboseMessage("fir4: hgain=%04x hcoeff=%04x %04x %04x %04x", fir4_hgain, fir4_hcoeffs[0], fir4_hcoeffs[1], fir4_hcoeffs[2], fir4_hcoeffs[3]); + hle->VerboseMessage("tap count=%d\n" "delays: %08x %08x %08x %08x %08x %08x %08x %08x\n" "gains: %04x %04x %04x %04x %04x %04x %04x %04x", tap_count, tap_delays[0], tap_delays[1], tap_delays[2], tap_delays[3], tap_delays[4], tap_delays[5], tap_delays[6], tap_delays[7], tap_gains[0], tap_gains[1], tap_gains[2], tap_gains[3], tap_gains[4], tap_gains[5], tap_gains[6], tap_gains[7]); + hle->VerboseMessage("sfx_gains=%04x %04x", sfx_gains[0], sfx_gains[1]); + + /* mix up to 8 delayed subframes */ + memset(subframe, 0, SUBFRAME_SIZE * sizeof(subframe[0])); + for (i = 0; i < tap_count; ++i) + { + dpos = pos - tap_delays[i]; + if (dpos <= 0) + { + dpos += cbuffer_length; + } + dlength = SUBFRAME_SIZE; + + if ((uint32_t)(dpos + SUBFRAME_SIZE) > cbuffer_length) + { + dlength = cbuffer_length - dpos; + dram_load_u16(hle, (uint16_t *)delayed + dlength, cbuffer_ptr, SUBFRAME_SIZE - dlength); + } + + dram_load_u16(hle, (uint16_t *)delayed, cbuffer_ptr + dpos * 2, dlength); + + mix_subframes(subframe, delayed, tap_gains[i]); + } + + /* add resulting subframe to main subframes */ + mix_sfx_with_main_subframes(musyx, subframe, sfx_gains); + + /* apply FIR4 filter and writeback filtered result */ + memcpy(buffer, musyx->subframe_740_last4, 4 * sizeof(int16_t)); + memcpy(musyx->subframe_740_last4, subframe + SUBFRAME_SIZE - 4, 4 * sizeof(int16_t)); + mix_fir4(musyx->e50, buffer + 1, fir4_hgain, fir4_hcoeffs); + dram_store_u16(hle, (uint16_t *)musyx->e50, cbuffer_ptr + pos * 2, SUBFRAME_SIZE); +} + +static void mix_sfx_with_main_subframes_v1(musyx_t *musyx, const int16_t *subframe, const uint16_t* UNUSED(gains)) +{ + unsigned i; + + for (i = 0; i < SUBFRAME_SIZE; ++i) + { + int16_t v = subframe[i]; + musyx->left[i] = clamp_s16(musyx->left[i] + v); + musyx->right[i] = clamp_s16(musyx->right[i] + v); + } +} + +static void mix_sfx_with_main_subframes_v2(musyx_t *musyx, const int16_t *subframe, const uint16_t* gains) +{ + unsigned i; + + for (i = 0; i < SUBFRAME_SIZE; ++i) + { + int16_t v = subframe[i]; + int16_t v1 = (int32_t)(v * gains[0]) >> 16; + int16_t v2 = (int32_t)(v * gains[1]) >> 16; + + musyx->left[i] = clamp_s16(musyx->left[i] + v1); + musyx->right[i] = clamp_s16(musyx->right[i] + v1); + musyx->cc0[i] = clamp_s16(musyx->cc0[i] + v2); + } +} + +static void mix_samples(int16_t *y, int16_t x, int16_t hgain) +{ + *y = clamp_s16(*y + ((x * hgain + 0x4000) >> 15)); +} + +static void mix_subframes(int16_t *y, const int16_t *x, int16_t hgain) +{ + for (unsigned int i = 0; i < SUBFRAME_SIZE; ++i) + { + mix_samples(&y[i], x[i], hgain); + } +} + +static void mix_fir4(int16_t *y, const int16_t *x, int16_t hgain, const int16_t *hcoeffs) +{ + unsigned int i; + int32_t h[4]; + + h[0] = (hgain * hcoeffs[0]) >> 15; + h[1] = (hgain * hcoeffs[1]) >> 15; + h[2] = (hgain * hcoeffs[2]) >> 15; + h[3] = (hgain * hcoeffs[3]) >> 15; + + for (i = 0; i < SUBFRAME_SIZE; ++i) + { + int32_t v = (h[0] * x[i] + h[1] * x[i + 1] + h[2] * x[i + 2] + h[3] * x[i + 3]) >> 15; + y[i] = clamp_s16(y[i] + v); + } +} + +static void interleave_stage_v1(CHle * hle, musyx_t *musyx, uint32_t output_ptr) +{ + size_t i; + + int16_t base_left; + int16_t base_right; + + int16_t *left; + int16_t *right; + uint32_t *dst; + + hle->VerboseMessage("interleave: %08x", output_ptr); + + base_left = clamp_s16(musyx->base_vol[0]); + base_right = clamp_s16(musyx->base_vol[1]); + + left = musyx->left; + right = musyx->right; + dst = dram_u32(hle, output_ptr); + + for (i = 0; i < SUBFRAME_SIZE; ++i) + { + uint16_t l = clamp_s16(*(left++) + base_left); + uint16_t r = clamp_s16(*(right++) + base_right); + + *(dst++) = (l << 16) | r; + } +} + +static void interleave_stage_v2(CHle * hle, musyx_t *musyx, uint16_t mask_16, uint32_t ptr_18, uint32_t ptr_1c, uint32_t output_ptr) +{ + unsigned i, k; + int16_t subframe[SUBFRAME_SIZE]; + uint32_t *dst; + uint16_t mask; + + hle->VerboseMessage("mask_16=%04x ptr_18=%08x ptr_1c=%08x output_ptr=%08x", mask_16, ptr_18, ptr_1c, output_ptr); + + /* compute L_total, R_total and update subframe @ptr_1c */ + memset(subframe, 0, SUBFRAME_SIZE*sizeof(subframe[0])); + + for(i = 0; i < SUBFRAME_SIZE; ++i) + { + int16_t v = *dram_u16(hle, ptr_1c + i*2); + musyx->left[i] = v; + musyx->right[i] = clamp_s16(-v); + } + + for (k = 0, mask = 1; k < 8; ++k, mask <<= 1, ptr_18 += 8) + { + int16_t hgain; + uint32_t address; + + if ((mask_16 & mask) == 0) + { + continue; + } + + address = *dram_u32(hle, ptr_18); + hgain = *dram_u16(hle, ptr_18 + 4); + + for(i = 0; i < SUBFRAME_SIZE; ++i, address += 2) + { + mix_samples(&musyx->left[i], *dram_u16(hle, address), hgain); + mix_samples(&musyx->right[i], *dram_u16(hle, address + 2*SUBFRAME_SIZE), hgain); + mix_samples(&subframe[i], *dram_u16(hle, address + 4*SUBFRAME_SIZE), hgain); + } + } + + /* interleave L_total and R_total */ + dst = dram_u32(hle, output_ptr); + for(i = 0; i < SUBFRAME_SIZE; ++i) + { + uint16_t l = musyx->left[i]; + uint16_t r = musyx->right[i]; + *(dst++) = (l << 16) | r; + } + + /* writeback subframe @ptr_1c */ + dram_store_u16(hle, (uint16_t*)subframe, ptr_1c, SUBFRAME_SIZE); +} \ No newline at end of file diff --git a/Source/Android/PluginRSP/stdafx.cpp b/Source/Android/PluginRSP/stdafx.cpp new file mode 100644 index 000000000..e8d17943b --- /dev/null +++ b/Source/Android/PluginRSP/stdafx.cpp @@ -0,0 +1,11 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include "stdafx.h" \ No newline at end of file diff --git a/Source/Android/PluginRSP/stdafx.h b/Source/Android/PluginRSP/stdafx.h new file mode 100644 index 000000000..e8844fade --- /dev/null +++ b/Source/Android/PluginRSP/stdafx.h @@ -0,0 +1,16 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#include +#include +#include +#include "Version.h" +#include "Rsp.h" +#include "hle.h" diff --git a/Source/Android/PluginRSP/ucodes.h b/Source/Android/PluginRSP/ucodes.h new file mode 100644 index 000000000..87eb7c4a5 --- /dev/null +++ b/Source/Android/PluginRSP/ucodes.h @@ -0,0 +1,126 @@ +/**************************************************************************** +* * +* Project64 - A Nintendo 64 emulator. * +* http://www.pj64-emu.com/ * +* Copyright (C) 2016 Project64. All rights reserved. * +* * +* License: * +* GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html * +* * +****************************************************************************/ +#pragma once + +class CHle; + +/* cic_x105 ucode */ +void cicx105_ucode(CHle * hle); + +/* audio list ucodes - audio */ +enum { N_SEGMENTS = 16 }; + +struct alist_audio_t +{ + /* segments */ + uint32_t segments[N_SEGMENTS]; + + /* main buffers */ + uint16_t in; + uint16_t out; + uint16_t count; + + /* auxiliary buffers */ + uint16_t dry_right; + uint16_t wet_left; + uint16_t wet_right; + + /* gains */ + int16_t dry; + int16_t wet; + + /* envelopes (0:left, 1:right) */ + int16_t vol[2]; + int16_t target[2]; + int32_t rate[2]; + + /* ADPCM loop point address */ + uint32_t loop; + + /* storage for ADPCM table and polef coefficients */ + int16_t table[16 * 8]; +}; + +void alist_process_audio(CHle * hle); +void alist_process_audio_ge(CHle * hle); +void alist_process_audio_bc(CHle * hle); + +/* audio list ucodes - naudio */ +struct alist_naudio_t +{ + /* gains */ + int16_t dry; + int16_t wet; + + /* envelopes (0:left, 1:right) */ + int16_t vol[2]; + int16_t target[2]; + int32_t rate[2]; + + /* ADPCM loop point address */ + uint32_t loop; + + /* storage for ADPCM table and polef coefficients */ + int16_t table[16 * 8]; +}; + +void alist_process_naudio(CHle * hle); +void alist_process_naudio_bk(CHle * hle); +void alist_process_naudio_dk(CHle * hle); +void alist_process_naudio_mp3(CHle * hle); +void alist_process_naudio_cbfd(CHle * hle); + +/* audio list ucodes - nead */ +struct alist_nead_t +{ + /* main buffers */ + uint16_t in; + uint16_t out; + uint16_t count; + + /* envmixer ramps */ + uint16_t env_values[3]; + uint16_t env_steps[3]; + + /* ADPCM loop point address */ + uint32_t loop; + + /* storage for ADPCM table and polef coefficients */ + int16_t table[16 * 8]; + + /* filter audio command state */ + uint16_t filter_count; + uint32_t filter_lut_address[2]; +}; + +void alist_process_nead_mk(CHle * hle); +void alist_process_nead_sfj(CHle * hle); +void alist_process_nead_sf(CHle * hle); +void alist_process_nead_fz(CHle * hle); +void alist_process_nead_wrjb(CHle * hle); +void alist_process_nead_ys(CHle * hle); +void alist_process_nead_1080(CHle * hle); +void alist_process_nead_oot(CHle * hle); +void alist_process_nead_mm(CHle * hle); +void alist_process_nead_mmb(CHle * hle); +void alist_process_nead_ac(CHle * hle); + +/* mp3 ucode */ +void mp3_task(CHle * hle, unsigned int index, uint32_t address); + +/* musyx ucodes */ +void musyx_v1_task(CHle * hle); +void musyx_v2_task(CHle * hle); + +/* jpeg ucodes */ +void jpeg_decode_PS0(CHle * hle); +void jpeg_decode_PS(CHle * hle); +void jpeg_decode_OB(CHle * hle);