fix all known div32 issues

support libretro naomi and aw savestates
This commit is contained in:
Flyinghead 2020-06-06 18:53:47 +02:00
parent 0fd51fb52a
commit 8740f22c0a
7 changed files with 275 additions and 102 deletions

View File

@ -907,10 +907,14 @@ static bool dec_generic(u32 op)
{
verify(!state.cpu.is_delayslot);
//div32s
Emit(shop_xor, mk_reg(reg_sr_T), mk_reg(div_som_reg3), mk_reg(div_som_reg2)); // get quotient sign
Emit(shop_and, mk_reg(reg_sr_T), mk_reg(reg_sr_T), mk_imm(1 << 31)); // isolate sign bit
Emit(shop_div32s, mk_reg(div_som_reg1), mk_reg(div_som_reg1), mk_reg(div_som_reg2), 0, mk_reg(div_som_reg3), mk_reg(div_som_reg3));
Emit(shop_and, mk_reg(reg_sr_T), mk_reg(div_som_reg1), mk_imm((1 << 31) | 1)); // set lsb and sign of quotient in T
Emit(shop_and, mk_reg(reg_temp), mk_reg(div_som_reg1), mk_imm(1)); // set quotient lsb in temp reg
Emit(shop_sar, mk_reg(div_som_reg1), mk_reg(div_som_reg1), mk_imm(1)); // shift quotient right
Emit(shop_or, mk_reg(reg_sr_T), mk_reg(reg_sr_T), mk_reg(reg_temp)); // store quotient lsb in T
Emit(shop_div32p2, mk_reg(div_som_reg3), mk_reg(div_som_reg3), mk_reg(div_som_reg2), 0, mk_reg(reg_sr_T));
@ -922,29 +926,22 @@ static bool dec_generic(u32 op)
}
else
{
//sr.Q=r[n]>>31;
//sr.M=r[m]>>31;
//sr.T=sr.M^sr.Q;
//This is nasty because there isn't a temp reg ..
//VERY NASTY
//Clear Q & M
Emit(shop_and,mk_reg(reg_sr_status),mk_reg(reg_sr_status),mk_imm(~((1<<8)|(1<<9))));
Emit(shop_and, mk_reg(reg_sr_status), mk_reg(reg_sr_status), mk_imm(~((1 << 8) | (1 << 9))));
//sr.Q=r[n]>>31;
Emit(shop_sar,mk_reg(reg_sr_T),rs1,mk_imm(31));
Emit(shop_and,mk_reg(reg_sr_T),mk_reg(reg_sr_T),mk_imm(1<<8));
Emit(shop_or,mk_reg(reg_sr_status),mk_reg(reg_sr_status),mk_reg(reg_sr_T));
Emit(shop_sar, mk_reg(reg_sr_T),rs1,mk_imm(31));
Emit(shop_and, mk_reg(reg_sr_T), mk_reg(reg_sr_T), mk_imm(1 << 8));
Emit(shop_or, mk_reg(reg_sr_status), mk_reg(reg_sr_status), mk_reg(reg_sr_T));
//sr.M=r[m]>>31;
Emit(shop_sar,mk_reg(reg_sr_T),rs2,mk_imm(31));
Emit(shop_and,mk_reg(reg_sr_T),mk_reg(reg_sr_T),mk_imm(1<<9));
Emit(shop_or,mk_reg(reg_sr_status),mk_reg(reg_sr_status),mk_reg(reg_sr_T));
Emit(shop_sar, mk_reg(reg_sr_T), rs2, mk_imm(31));
Emit(shop_and, mk_reg(reg_sr_T), mk_reg(reg_sr_T), mk_imm(1 << 9));
Emit(shop_or, mk_reg(reg_sr_status), mk_reg(reg_sr_status), mk_reg(reg_sr_T));
//sr.T=sr.M^sr.Q;
Emit(shop_xor,mk_reg(reg_sr_T),rs1,rs2);
Emit(shop_shr,mk_reg(reg_sr_T),mk_reg(reg_sr_T),mk_imm(31));
Emit(shop_xor, mk_reg(reg_sr_T), rs1, rs2);
Emit(shop_shr, mk_reg(reg_sr_T), mk_reg(reg_sr_T), mk_imm(31));
}
}
}

View File

@ -609,28 +609,36 @@ shil_opc_end()
shil_opc(div32s)
shil_canonical
(
u64,f1,(u32 r1, u32 r2, s32 r3),
u64,f1,(u32 r1, s32 r2, s32 r3),
s64 dividend = ((s64)r3 << 32) | r1;
bool negative;
if (dividend < 0)
if ((r3 ^ r2) & 0x80000000)
{
// 1's complement -> 2's complement
dividend++;
if (dividend < 0)
dividend++;
negative = true;
}
else
{
negative = false;
}
s32 quo = dividend / r2;
s32 rem = dividend % r2;
s32 quo = (s32)(dividend / r2);
s32 rem = dividend - quo * r2;
// 2's complement -> 1's complement
if (negative)
{
if (quo <= 0)
quo--;
if (rem <= 0)
if (rem < 0 || (rem == 0 && r2 > 0))
rem--;
quo--;
}
else
{
// edge case
if (rem == 0 && dividend < 0 && !(quo & 1))
{
quo--;
rem += r2;
}
}
u64 rv;
@ -655,19 +663,17 @@ shil_opc(div32p2)
shil_canonical
(
u32,f1,(s32 a,s32 b,u32 T),
a += b * (((T >> 31) ^ ~T) & 1) * (2 * (T & 1) - 1);
// This is equivalent to this:
// (the sign of the quotient is stored in bit 31 of T)
// if (quo >= 0)
// {
// if (!T)
// rem -= divisor;
// }
// else
// {
// if (T)
// rem += divisor;
// }
// the sign of the quotient is stored in bit 31 of T
if (!(T & 0x80000000))
{
if (!(T & 1))
a -= b;
}
else
{
if (T & 1)
a += b;
}
return a;
)

View File

@ -239,6 +239,9 @@ static u32* Sh4_int_GetRegisterPtr(Sh4RegType reg)
case reg_pc_dyn:
return &Sh4cntx.jdyn;
case reg_temp:
return &Sh4cntx.temp_reg;
default:
ERROR_LOG(SH4, "Unknown register ID %d", reg);
die("Invalid reg");

View File

@ -88,6 +88,7 @@ enum Sh4RegType
reg_fpscr,
reg_pc_dyn, //Write only, for dynarec only (dynamic block exit address)
reg_temp,
sh4_reg_count,
@ -286,6 +287,7 @@ struct Sh4Context
u32 interrupt_pend;
u32 exception_pc;
u32 temp_reg;
};
u64 raw[64-8];
};

View File

@ -22,6 +22,7 @@
#include "hw/sh4/sh4_if.h"
#include "hw/pvr/Renderer_if.h"
#include "hw/pvr/spg.h"
#include "hw/aica/aica_if.h"
#include "hw/aica/dsp.h"
#include "imgread/common.h"
#include "rend/gui.h"
@ -155,41 +156,6 @@ static void LoadSpecialSettings()
settings.rend.TranslucentPolygonDepthMask = 1;
tr_poly_depth_mask_game = true;
}
// Demolition Racer
if (!strncmp("T15112N", prod_id, 7)
// Ducati World - Racing Challenge (NTSC)
|| !strncmp("T-8113N", prod_id, 7)
// Ducati World (PAL)
|| !strncmp("T-8121D-50", prod_id, 10)
// Aqua GT
|| !strncmp("T40509D 50", prod_id, 10)
// Rayman 2 (NTSC)
|| !strncmp("17707N", prod_id, 6)
// Rayman 2 (PAL)
|| !strncmp("17707D", prod_id, 6)
// Elysion
|| !strncmp("T20116M", prod_id, 7)
// Silent Scope (NTSC)
|| !strncmp("T9507N", prod_id, 6)
// Silent Scope (PAL)
|| !strncmp("T9505D", prod_id, 6)
// Power Stone (US)
|| !strncmp("T1201N", prod_id, 6)
// Power Stone (JP)
|| !strncmp("T1201M", prod_id, 6)
// Power Stone (PAL)
|| !strncmp("T36801D 50", prod_id, 10)
// Metropolis Street Racer (NTSC)
|| !strncmp("MK-51012", prod_id, 8)
// Metropolis Street Racer (PAL)
|| !strncmp("MK-5102250", prod_id, 10)
// Donald Duck Goin' Quackers
|| !strncmp("T17714D50", prod_id, 9))
{
INFO_LOG(BOOT, "Enabling Dynarec safe mode for game %s", prod_id);
settings.dynarec.safemode = 1;
safemode_game = true;
}
// NHL 2K2
if (!strncmp("MK-51182", prod_id, 8))
{
@ -307,16 +273,6 @@ static void LoadSpecialSettings()
else if (settings.platform.system == DC_PLATFORM_NAOMI || settings.platform.system == DC_PLATFORM_ATOMISWAVE)
{
NOTICE_LOG(BOOT, "Game ID is [%s]", naomi_game_id);
if (!strcmp("METAL SLUG 6", naomi_game_id)
|| !strcmp("WAVE RUNNER GP", naomi_game_id)
|| !strcmp("STREET FIGHTER ZERO3 UPPER", naomi_game_id)
|| !strcmp("ALIEN FRONT", naomi_game_id))
{
INFO_LOG(BOOT, "Enabling Dynarec safe mode for game %s", naomi_game_id);
settings.dynarec.safemode = 1;
safemode_game = true;
}
if (!strcmp("SAMURAI SPIRITS 6", naomi_game_id))
{
INFO_LOG(BOOT, "Enabling Extra depth scaling for game %s", naomi_game_id);
@ -417,7 +373,8 @@ static void LoadSpecialSettings()
INFO_LOG(BOOT, "Enabling specific JVS setup for game %s", naomi_game_id);
settings.input.JammaSetup = JVS::DogWalking;
}
else if (!strcmp(" TOUCH DE UNOH -------------", naomi_game_id))
else if (!strcmp(" TOUCH DE UNOH -------------", naomi_game_id)
|| !strcmp("POKASUKA GHOST (JAPANESE)", naomi_game_id))
{
INFO_LOG(BOOT, "Enabling specific JVS setup for game %s", naomi_game_id);
settings.input.JammaSetup = JVS::TouchDeUno;
@ -819,12 +776,12 @@ void InitSettings()
settings.network.server = "";
#if SUPPORT_DISPMANX
settings.dispmanx.Width = 640;
settings.dispmanx.Height = 480;
settings.dispmanx.Width = 0;
settings.dispmanx.Height = 0;
settings.dispmanx.Keep_Aspect = true;
#endif
#if defined(__ANDROID__) || defined(TARGET_PANDORA)
#if HOST_CPU == CPU_ARM
settings.aica.BufferSize = 5644; // 128 ms
#else
settings.aica.BufferSize = 2822; // 64 ms

View File

@ -571,19 +571,31 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
REICAST_US(SB_FFST_rc);
REICAST_US(SB_FFST);
REICAST_US(i); //LIBRETRO_S(sys_nvmem_sram.size);
verify(i == 0);
REICAST_US(i); //LIBRETRO_S(sys_nvmem_sram.mask);
//LIBRETRO_SA(sys_nvmem_sram.data,sys_nvmem_sram.size);
REICAST_US(sys_nvmem->size);
REICAST_US(sys_nvmem->mask);
if (settings.platform.system == DC_PLATFORM_DREAMCAST)
REICAST_US(static_cast<DCFlashChip*>(sys_nvmem)->state);
if (settings.platform.system == DC_PLATFORM_NAOMI)
{
REICAST_US(sys_nvmem->size); // Naomi
REICAST_US(sys_nvmem->mask);
REICAST_USA(sys_nvmem->data, sys_nvmem->size);
}
else
// FIXME
die("Naomi/Atomiswave libretro savestates are not supported");
REICAST_USA(sys_nvmem->data, sys_nvmem->size);
{
REICAST_US(i);
REICAST_US(i);
}
if (settings.platform.system == DC_PLATFORM_DREAMCAST)
{
REICAST_US(sys_nvmem->size);
REICAST_US(sys_nvmem->mask);
REICAST_US(static_cast<DCFlashChip*>(sys_nvmem)->state);
REICAST_USA(sys_nvmem->data, sys_nvmem->size);
}
else
{
REICAST_US(i);
REICAST_US(i);
REICAST_US(i);
}
REICAST_US(GD_HardwareInfo);

196
tests/src/div32_test.cpp Normal file
View File

@ -0,0 +1,196 @@
#include "gtest/gtest.h"
#include "types.h"
#include "hw/mem/_vmem.h"
#include "emulator.h"
#include "hw/sh4/sh4_core.h"
#include "hw/sh4/dyna/shil.h"
#define SHIL_MODE 2
#include "hw/sh4/dyna/shil_canonical.h"
void div32s_slow(u32& r1, u32 r2, u32& r3)
{
sr.Q = r3 >> 31;
sr.M = r2 >> 31;
sr.T = sr.Q ^ sr.M;
for (int i = 0; i < 32; i++)
{
u64 rv = shil_opcl_rocl::f1::impl(r1, sr.T);
r1 = (u32)rv;
sr.T = rv >> 32;
// DIV1
unsigned char old_q = sr.Q;
sr.Q = (u8)((0x80000000 & r3) !=0);
r3 <<= 1;
r3 |= (unsigned long)sr.T;
u32 tmp0 = r3;
r3 += (2 * (old_q ^ sr.M) - 1) * r2;
sr.Q ^= old_q ^ (sr.M ? r3 > tmp0 : r3 >= tmp0);
sr.T = (sr.Q == sr.M);
}
}
void div32s_fast(u32& r1, u32 r2, u32& r3)
{
sr.T = (r3 ^ r2) & 0x80000000;
u64 rv = shil_opcl_div32s::f1::impl(r1, r2, r3);
r1 = (u32)rv;
r3 = rv >> 32;
sr.T |= r1 & 1;
r1 = (s32)r1 >> 1;
r3 = shil_opcl_div32p2::f1::impl(r3, r2, sr.T);
sr.T &= 1;
}
void div32u_fast(u32& r1, u32 r2, u32& r3)
{
u64 rv = shil_opcl_div32u::f1::impl(r1, r2, r3);
r1 = (u32)rv;
r3 = rv >> 32;
sr.T = r1 & 1;
r1 = r1 >> 1;
r3 = shil_opcl_div32p2::f1::impl(r3, r2, sr.T);
}
void div32u_slow(u32& r1, u32 r2, u32& r3)
{
sr.Q = 0;
sr.M = 0;
sr.T = 0;
for (int i = 0; i < 32; i++)
{
u64 rv = shil_opcl_rocl::f1::impl(r1, sr.T);
r1 = (u32)rv;
sr.T = rv >> 32;
// DIV1
unsigned char old_q = sr.Q;
sr.Q = (u8)((0x80000000 & r3) !=0);
r3 <<= 1;
r3 |= (unsigned long)sr.T;
u32 tmp0 = r3;
r3 += (2 * (old_q ^ sr.M) - 1) * r2;
sr.Q ^= old_q ^ (sr.M ? r3 > tmp0 : r3 >= tmp0);
sr.T = (sr.Q == sr.M);
}
}
class Div32Test : public ::testing::Test {
protected:
void SetUp() override {
if (!_vmem_reserve())
die("_vmem_reserve failed");
dc_init();
dc_reset(true);
}
void div32s(u32 n1, u32 n2, u32 n3)
{
const long long int dividend = (long long)n3 << 32 | n1;
//printf("%lld / %d = ", dividend, n2);
int r1s = n1;
int r2 = n2;
int r3s = n3;
int r1f = r1s;
int r3f = r3s;
div32s_slow((u32&)r1s, r2, (u32&)r3s);
div32s_fast((u32&)r1f, r2, (u32&)r3f);
//printf("%d %% %d\n", (r1s << 1) | sr.T, r3s);
ASSERT_EQ(r1s, r1f);
ASSERT_EQ(r3s, r3f);
}
void div32u(u32 n1, u32 n2, u32 n3)
{
const long long int dividend = (long long)n3 << 32 | n1;
//printf("%lld / %d = ", dividend, n2);
int r1s = n1;
int r2 = n2;
int r3s = n3;
int r1f = r1s;
int r3f = r3s;
div32u_slow((u32&)r1s, r2, (u32&)r3s);
div32u_fast((u32&)r1f, r2, (u32&)r3f);
//printf("%d %% %d\n", (r1s << 1) | sr.T, r3s);
ASSERT_EQ(r3s, r3f);
}
};
TEST_F(Div32Test, Div32sTest)
{
div32s(0, 1, 0);
div32s(1, 1, 0);
div32s(2, 1, 0);
div32s(4, 2, 0);
div32s(5, 2, 0);
div32s(1000, 100, 0);
div32s(1001, 100, 0);
div32s(1099, 100, 0);
div32s(1100, 100, 0);
div32s(37, 5, 0);
div32s(-37, 5, -1);
div32s(-37, -5, -1);
div32s(37, -5, 0);
div32s(42, 5, 0);
div32s(42, -5, 0);
div32s(-42, 5, -1);
div32s(-42, -5, -1);
div32s(5, 7, 0);
div32s(5, -7, 0);
div32s(-5, 7, -1);
div32s(-5, -7, -1);
div32s(-1846, -1643, -1);
div32s(-496138, -1042, -1);
div32s(-416263, -1037, -1);
div32s(-270831, -13276, -1);
div32s(-3338802, -7266, -1);
div32s(-3106, -354865, -1);
div32s(-4446, -4095, -1);
div32s(-64, -8, -1);
div32s(-72, -8, -1);
div32s(217781009, -45, 0);
div32s(1858552, -8, 0);
div32s(64, -8, 0);
div32s(-64, 8, -1);
div32s(-9415081, 130765, -1);
div32s(-3639715, 78, -1);
div32s(-11361399, 107183, -1);
}
TEST_F(Div32Test, Div32uTest)
{
div32u(0, 1, 0);
div32u(1, 1, 0);
div32u(2, 1, 0);
div32u(4, 2, 0);
div32u(5, 2, 0);
div32u(1000, 100, 0);
div32u(1001, 100, 0);
div32u(1099, 100, 0);
div32u(1100, 100, 0);
div32u(1964671145, 123383161, 0);
div32u(1867228769, 653467280, 0);
div32u(1523687288, 32181601, 0);
div32u(3499805483, 1401792939, 29611);
}