mirror of https://github.com/PCSX2/pcsx2.git
IPU: MultiISA IPU
This commit is contained in:
parent
44e69a9603
commit
1a383de5c4
|
@ -971,6 +971,31 @@ else()
|
|||
)
|
||||
endif()
|
||||
|
||||
# IPU sources
|
||||
set(pcsx2IPUSources
|
||||
IPU/IPU.cpp
|
||||
IPU/IPU_Fifo.cpp
|
||||
IPU/IPUdma.cpp
|
||||
)
|
||||
|
||||
set(pcsx2IPUSourcesUnshared
|
||||
IPU/IPU_MultiISA.cpp
|
||||
IPU/IPUdither.cpp
|
||||
IPU/mpeg2lib/Idct.cpp
|
||||
IPU/mpeg2lib/Mpeg.cpp
|
||||
IPU/yuv2rgb.cpp
|
||||
)
|
||||
|
||||
# IPU headers
|
||||
set(pcsx2IPUHeaders
|
||||
IPU/IPU.h
|
||||
IPU/IPU_Fifo.h
|
||||
IPU/IPU_MultiISA.h
|
||||
IPU/IPUdma.h
|
||||
IPU/mpeg2lib/Mpeg.h
|
||||
IPU/mpeg2lib/Vlc.h
|
||||
IPU/yuv2rgb.h
|
||||
)
|
||||
|
||||
if(DISABLE_ADVANCE_SIMD)
|
||||
target_compile_definitions(PCSX2 PRIVATE MULTI_ISA_SHARED_COMPILATION)
|
||||
|
@ -997,7 +1022,7 @@ if(DISABLE_ADVANCE_SIMD)
|
|||
# Note: ld64 (macOS's linker) does not act the same way when presented with .a files, unless linked with `-force_load` (cmake WHOLE_ARCHIVE).
|
||||
set(is_first_isa "1")
|
||||
foreach(isa "sse4" "avx" "avx2")
|
||||
add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared})
|
||||
add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared})
|
||||
target_link_libraries(GS-${isa} PRIVATE PCSX2_FLAGS)
|
||||
target_compile_definitions(GS-${isa} PRIVATE MULTI_ISA_UNSHARED_COMPILATION=isa_${isa} MULTI_ISA_IS_FIRST=${is_first_isa} ${pcsx2_defs_${isa}})
|
||||
target_compile_options(GS-${isa} PRIVATE ${compile_options_${isa}})
|
||||
|
@ -1012,6 +1037,7 @@ if(DISABLE_ADVANCE_SIMD)
|
|||
endforeach()
|
||||
else()
|
||||
list(APPEND pcsx2GSSources ${pcsx2GSSourcesUnshared})
|
||||
list(APPEND pcsx2IPUSources ${pcsx2IPUSourcesUnshared})
|
||||
endif()
|
||||
|
||||
# DebugTools sources
|
||||
|
@ -1341,26 +1367,6 @@ set(pcsx2GuiResources
|
|||
${res_bin}/Breakpoint_Inactive.h
|
||||
)
|
||||
|
||||
# IPU sources
|
||||
set(pcsx2IPUSources
|
||||
IPU/IPU.cpp
|
||||
IPU/IPU_Fifo.cpp
|
||||
IPU/IPUdither.cpp
|
||||
IPU/IPUdma.cpp
|
||||
IPU/mpeg2lib/Idct.cpp
|
||||
IPU/mpeg2lib/Mpeg.cpp
|
||||
IPU/yuv2rgb.cpp)
|
||||
|
||||
# IPU headers
|
||||
set(pcsx2IPUHeaders
|
||||
IPU/IPUdma.h
|
||||
IPU/IPU_Fifo.h
|
||||
IPU/IPU.h
|
||||
IPU/mpeg2lib/Mpeg.h
|
||||
IPU/mpeg2lib/Vlc.h
|
||||
IPU/yuv2rgb.h
|
||||
)
|
||||
|
||||
# Linux sources
|
||||
set(pcsx2LinuxSources
|
||||
CDVD/Linux/DriveUtility.cpp
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "common/emitter/tools.h"
|
||||
#include "common/General.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class SettingsInterface;
|
||||
class SettingsWrapper;
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
#include "Common.h"
|
||||
|
||||
#include "IPU.h"
|
||||
#include "IPU_MultiISA.h"
|
||||
#include "IPUdma.h"
|
||||
#include "yuv2rgb.h"
|
||||
#include "mpeg2lib/Mpeg.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
@ -31,7 +31,7 @@ alignas(16) tIPU_cmd ipu_cmd;
|
|||
alignas(16) tIPU_BP g_BP;
|
||||
alignas(16) decoder_t decoder;
|
||||
|
||||
void IPUWorker();
|
||||
static void (*IPUWorker)();
|
||||
|
||||
// Color conversion stuff, the memory layout is a total hack
|
||||
// convert_data_buffer is a pointer to the internal rgb struct (the first param in convert_init_t)
|
||||
|
@ -40,11 +40,11 @@ void IPUWorker();
|
|||
//u8 PCT[] = {'r', 'I', 'P', 'B', 'D', '-', '-', '-'}; // unused?
|
||||
|
||||
// Quantization matrix
|
||||
static rgb16_t vqclut[16]; //clut conversion table
|
||||
static u16 s_thresh[2]; //thresholds for color conversions
|
||||
rgb16_t g_ipu_vqclut[16]; //clut conversion table
|
||||
u16 g_ipu_thresh[2]; //thresholds for color conversions
|
||||
int coded_block_pattern = 0;
|
||||
|
||||
alignas(16) static u8 indx4[16*16/2];
|
||||
alignas(16) u8 g_ipu_indx4[16*16/2];
|
||||
|
||||
uint eecount_on_last_vdec = 0;
|
||||
bool FMVstarted = false;
|
||||
|
@ -67,6 +67,7 @@ __fi void IPUProcessInterrupt()
|
|||
|
||||
void ipuReset()
|
||||
{
|
||||
IPUWorker = MULTI_ISA_SELECT(IPUWorker);
|
||||
memzero(ipuRegs);
|
||||
memzero(g_BP);
|
||||
memzero(decoder);
|
||||
|
@ -84,8 +85,8 @@ void ReportIPU()
|
|||
Console.WriteLn(ipu_fifo.in.desc());
|
||||
Console.WriteLn(ipu_fifo.out.desc());
|
||||
Console.WriteLn(g_BP.desc());
|
||||
Console.WriteLn("vqclut = 0x%x.", vqclut);
|
||||
Console.WriteLn("s_thresh = 0x%x.", s_thresh);
|
||||
Console.WriteLn("vqclut = 0x%x.", g_ipu_vqclut);
|
||||
Console.WriteLn("thresh = 0x%x.", g_ipu_thresh);
|
||||
Console.WriteLn("coded_block_pattern = 0x%x.", coded_block_pattern);
|
||||
Console.WriteLn("g_decoder = 0x%x.", &decoder);
|
||||
Console.WriteLn("mpeg2_scan = 0x%x.", &mpeg2_scan);
|
||||
|
@ -101,8 +102,8 @@ void SaveStateBase::ipuFreeze()
|
|||
Freeze(ipu_fifo);
|
||||
|
||||
Freeze(g_BP);
|
||||
Freeze(vqclut);
|
||||
Freeze(s_thresh);
|
||||
Freeze(g_ipu_vqclut);
|
||||
Freeze(g_ipu_thresh);
|
||||
Freeze(coded_block_pattern);
|
||||
Freeze(decoder);
|
||||
Freeze(ipu_cmd);
|
||||
|
@ -408,305 +409,13 @@ static __ri void ipuBDEC(tIPU_CMD_BDEC bdec)
|
|||
memzero_sse_a(decoder.mb16);
|
||||
}
|
||||
|
||||
static __fi bool ipuVDEC(u32 val)
|
||||
{
|
||||
static int count = 0;
|
||||
if (count++ > 5) {
|
||||
if (!FMVstarted) {
|
||||
EnableFMV = true;
|
||||
FMVstarted = true;
|
||||
}
|
||||
count = 0;
|
||||
}
|
||||
eecount_on_last_vdec = cpuRegs.cycle;
|
||||
|
||||
switch (ipu_cmd.pos[0])
|
||||
{
|
||||
case 0:
|
||||
if (!bitstream_init()) return false;
|
||||
|
||||
switch ((val >> 26) & 3)
|
||||
{
|
||||
case 0://Macroblock Address Increment
|
||||
decoder.mpeg1 = ipuRegs.ctrl.MP1;
|
||||
ipuRegs.cmd.DATA = get_macroblock_address_increment();
|
||||
break;
|
||||
|
||||
case 1://Macroblock Type
|
||||
decoder.frame_pred_frame_dct = 1;
|
||||
decoder.coding_type = ipuRegs.ctrl.PCT > 0 ? ipuRegs.ctrl.PCT : 1; // Kaiketsu Zorro Mezase doesn't set a Picture type, seems happy with I
|
||||
ipuRegs.cmd.DATA = get_macroblock_modes();
|
||||
break;
|
||||
|
||||
case 2://Motion Code
|
||||
ipuRegs.cmd.DATA = get_motion_delta(0);
|
||||
break;
|
||||
|
||||
case 3://DMVector
|
||||
ipuRegs.cmd.DATA = get_dmv();
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
// HACK ATTACK! This code OR's the MPEG decoder's bitstream position into the upper
|
||||
// 16 bits of DATA; which really doesn't make sense since (a) we already rewound the bits
|
||||
// back into the IPU internal buffer above, and (b) the IPU doesn't have an MPEG internal
|
||||
// 32-bit decoder buffer of its own anyway. Furthermore, setting the upper 16 bits to
|
||||
// any value other than zero appears to work fine. When set to zero, however, FMVs run
|
||||
// very choppy (basically only decoding/updating every 30th frame or so). So yeah,
|
||||
// someone with knowledge on the subject please feel free to explain this one. :) --air
|
||||
|
||||
// The upper bits are the "length" of the decoded command, where the lower is the address.
|
||||
// This is due to differences with IPU and the MPEG standard. See get_macroblock_address_increment().
|
||||
|
||||
ipuRegs.ctrl.ECD = (ipuRegs.cmd.DATA == 0);
|
||||
[[fallthrough]];
|
||||
|
||||
case 1:
|
||||
if (!getBits32((u8*)&ipuRegs.top, 0))
|
||||
{
|
||||
ipu_cmd.pos[0] = 1;
|
||||
return false;
|
||||
}
|
||||
|
||||
ipuRegs.top = BigEndian(ipuRegs.top);
|
||||
|
||||
IPU_LOG("VDEC command data 0x%x(0x%x). Skip 0x%X bits/Table=%d (%s), pct %d",
|
||||
ipuRegs.cmd.DATA, ipuRegs.cmd.DATA >> 16, val & 0x3f, (val >> 26) & 3, (val >> 26) & 1 ?
|
||||
((val >> 26) & 2 ? "DMV" : "MBT") : (((val >> 26) & 2 ? "MC" : "MBAI")), ipuRegs.ctrl.PCT);
|
||||
|
||||
return true;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static __ri bool ipuFDEC(u32 val)
|
||||
{
|
||||
if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false;
|
||||
|
||||
ipuRegs.cmd.DATA = BigEndian(ipuRegs.cmd.DATA);
|
||||
ipuRegs.top = ipuRegs.cmd.DATA;
|
||||
|
||||
IPU_LOG("FDEC read: 0x%08x", ipuRegs.top);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ipuSETIQ(u32 val)
|
||||
{
|
||||
if ((val >> 27) & 1)
|
||||
{
|
||||
u8 (&niq)[64] = decoder.niq;
|
||||
|
||||
for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64((u8*)niq + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
IPU_LOG("Read non-intra quantization matrix from FIFO.");
|
||||
for (uint i = 0; i < 8; i++)
|
||||
{
|
||||
IPU_LOG("%02X %02X %02X %02X %02X %02X %02X %02X",
|
||||
niq[i * 8 + 0], niq[i * 8 + 1], niq[i * 8 + 2], niq[i * 8 + 3],
|
||||
niq[i * 8 + 4], niq[i * 8 + 5], niq[i * 8 + 6], niq[i * 8 + 7]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
u8 (&iq)[64] = decoder.iq;
|
||||
|
||||
for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64((u8*)iq + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
IPU_LOG("Read intra quantization matrix from FIFO.");
|
||||
for (uint i = 0; i < 8; i++)
|
||||
{
|
||||
IPU_LOG("%02X %02X %02X %02X %02X %02X %02X %02X",
|
||||
iq[i * 8 + 0], iq[i * 8 + 1], iq[i * 8 + 2], iq[i *8 + 3],
|
||||
iq[i * 8 + 4], iq[i * 8 + 5], iq[i * 8 + 6], iq[i *8 + 7]);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ipuSETVQ(u32 val)
|
||||
{
|
||||
for(;ipu_cmd.pos[0] < 4; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64(((u8*)vqclut) + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
IPU_LOG("SETVQ command. Read VQCLUT table from FIFO.\n"
|
||||
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
|
||||
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
|
||||
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
|
||||
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d",
|
||||
vqclut[0].r, vqclut[0].g, vqclut[0].b,
|
||||
vqclut[1].r, vqclut[1].g, vqclut[1].b,
|
||||
vqclut[2].r, vqclut[2].g, vqclut[2].b,
|
||||
vqclut[3].r, vqclut[3].g, vqclut[3].b,
|
||||
vqclut[4].r, vqclut[4].g, vqclut[4].b,
|
||||
vqclut[5].r, vqclut[5].g, vqclut[5].b,
|
||||
vqclut[6].r, vqclut[6].g, vqclut[6].b,
|
||||
vqclut[7].r, vqclut[7].g, vqclut[7].b,
|
||||
vqclut[8].r, vqclut[8].g, vqclut[8].b,
|
||||
vqclut[9].r, vqclut[9].g, vqclut[9].b,
|
||||
vqclut[10].r, vqclut[10].g, vqclut[10].b,
|
||||
vqclut[11].r, vqclut[11].g, vqclut[11].b,
|
||||
vqclut[12].r, vqclut[12].g, vqclut[12].b,
|
||||
vqclut[13].r, vqclut[13].g, vqclut[13].b,
|
||||
vqclut[14].r, vqclut[14].g, vqclut[14].b,
|
||||
vqclut[15].r, vqclut[15].g, vqclut[15].b);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// IPU Transfers are split into 8Qwords so we need to send ALL the data
|
||||
static __ri bool ipuCSC(tIPU_CMD_CSC csc)
|
||||
{
|
||||
csc.log_from_YCbCr();
|
||||
|
||||
for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
|
||||
{
|
||||
for(;ipu_cmd.pos[0] < 48; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64((u8*)&decoder.mb8 + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
ipu_csc(decoder.mb8, decoder.rgb32, 0);
|
||||
if (csc.OFM) ipu_dither(decoder.rgb32, decoder.rgb16, csc.DTE);
|
||||
|
||||
if (csc.OFM)
|
||||
{
|
||||
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & decoder.rgb16) + 4 * ipu_cmd.pos[1], 32 - ipu_cmd.pos[1]);
|
||||
if (ipu_cmd.pos[1] < 32) return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & decoder.rgb32) + 4 * ipu_cmd.pos[1], 64 - ipu_cmd.pos[1]);
|
||||
if (ipu_cmd.pos[1] < 64) return false;
|
||||
}
|
||||
|
||||
ipu_cmd.pos[0] = 0;
|
||||
ipu_cmd.pos[1] = 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static __ri bool ipuPACK(tIPU_CMD_CSC csc)
|
||||
{
|
||||
csc.log_from_RGB32();
|
||||
|
||||
for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
|
||||
{
|
||||
for(;ipu_cmd.pos[0] < (int)sizeof(macroblock_rgb32) / 8; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64((u8*)&decoder.rgb32 + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
ipu_dither(decoder.rgb32, decoder.rgb16, csc.DTE);
|
||||
|
||||
if (!csc.OFM) ipu_vq(decoder.rgb16, indx4);
|
||||
|
||||
if (csc.OFM)
|
||||
{
|
||||
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & decoder.rgb16) + 4 * ipu_cmd.pos[1], 32 - ipu_cmd.pos[1]);
|
||||
if (ipu_cmd.pos[1] < 32) return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*)indx4) + 4 * ipu_cmd.pos[1], 8 - ipu_cmd.pos[1]);
|
||||
if (ipu_cmd.pos[1] < 8) return false;
|
||||
}
|
||||
|
||||
ipu_cmd.pos[0] = 0;
|
||||
ipu_cmd.pos[1] = 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void ipuSETTH(u32 val)
|
||||
{
|
||||
s_thresh[0] = (val & 0x1ff);
|
||||
s_thresh[1] = ((val >> 16) & 0x1ff);
|
||||
g_ipu_thresh[0] = (val & 0x1ff);
|
||||
g_ipu_thresh[1] = ((val >> 16) & 0x1ff);
|
||||
IPU_LOG("SETTH (Set threshold value)command %x.", val&0x1ff01ff);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// CORE Functions (referenced from MPEG library)
|
||||
// --------------------------------------------------------------------------------------
|
||||
__fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn)
|
||||
{
|
||||
int i;
|
||||
u8* p = (u8*)&rgb32;
|
||||
|
||||
yuv2rgb();
|
||||
|
||||
if (s_thresh[0] > 0)
|
||||
{
|
||||
for (i = 0; i < 16*16; i++, p += 4)
|
||||
{
|
||||
if ((p[0] < s_thresh[0]) && (p[1] < s_thresh[0]) && (p[2] < s_thresh[0]))
|
||||
*(u32*)p = 0;
|
||||
else if ((p[0] < s_thresh[1]) && (p[1] < s_thresh[1]) && (p[2] < s_thresh[1]))
|
||||
p[3] = 0x40;
|
||||
}
|
||||
}
|
||||
else if (s_thresh[1] > 0)
|
||||
{
|
||||
for (i = 0; i < 16*16; i++, p += 4)
|
||||
{
|
||||
if ((p[0] < s_thresh[1]) && (p[1] < s_thresh[1]) && (p[2] < s_thresh[1]))
|
||||
p[3] = 0x40;
|
||||
}
|
||||
}
|
||||
if (sgn)
|
||||
{
|
||||
for (i = 0; i < 16*16; i++, p += 4)
|
||||
{
|
||||
*(u32*)p ^= 0x808080;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__fi void ipu_vq(macroblock_rgb16& rgb16, u8* indx4)
|
||||
{
|
||||
const auto closest_index = [&](int i, int j) {
|
||||
u8 index = 0;
|
||||
int min_distance = std::numeric_limits<int>::max();
|
||||
for (u8 k = 0; k < 16; ++k)
|
||||
{
|
||||
const int dr = rgb16.c[i][j].r - vqclut[k].r;
|
||||
const int dg = rgb16.c[i][j].g - vqclut[k].g;
|
||||
const int db = rgb16.c[i][j].b - vqclut[k].b;
|
||||
const int distance = dr * dr + dg * dg + db * db;
|
||||
|
||||
// XXX: If two distances are the same which index is used?
|
||||
if (min_distance > distance)
|
||||
{
|
||||
index = k;
|
||||
min_distance = distance;
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
};
|
||||
|
||||
for (int i = 0; i < 16; ++i)
|
||||
for (int j = 0; j < 8; ++j)
|
||||
indx4[i * 8 + j] = closest_index(i, 2 * j + 1) << 4 | closest_index(i, 2 * j);
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Buffer reader
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
@ -902,7 +611,7 @@ __fi void IPUCMD_WRITE(u32 val)
|
|||
break;
|
||||
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
ipuRegs.ctrl.BUSY = 1;
|
||||
|
||||
|
@ -916,71 +625,3 @@ __fi void IPUCMD_WRITE(u32 val)
|
|||
else
|
||||
IPUWorker();
|
||||
}
|
||||
|
||||
__noinline void IPUWorker()
|
||||
{
|
||||
pxAssert(ipuRegs.ctrl.BUSY);
|
||||
|
||||
switch (ipu_cmd.CMD)
|
||||
{
|
||||
// These are unreachable (BUSY will always be 0 for them)
|
||||
//case SCE_IPU_BCLR:
|
||||
//case SCE_IPU_SETTH:
|
||||
//break;
|
||||
|
||||
case SCE_IPU_IDEC:
|
||||
if (!mpeg2sliceIDEC()) return;
|
||||
|
||||
//ipuRegs.ctrl.OFC = 0;
|
||||
ipuRegs.topbusy = 0;
|
||||
ipuRegs.cmd.BUSY = 0;
|
||||
break;
|
||||
|
||||
case SCE_IPU_BDEC:
|
||||
if (!mpeg2_slice()) return;
|
||||
|
||||
ipuRegs.topbusy = 0;
|
||||
ipuRegs.cmd.BUSY = 0;
|
||||
|
||||
//if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
|
||||
break;
|
||||
|
||||
case SCE_IPU_VDEC:
|
||||
if (!ipuVDEC(ipu_cmd.current)) return;
|
||||
|
||||
ipuRegs.topbusy = 0;
|
||||
ipuRegs.cmd.BUSY = 0;
|
||||
break;
|
||||
|
||||
case SCE_IPU_FDEC:
|
||||
if (!ipuFDEC(ipu_cmd.current)) return;
|
||||
|
||||
ipuRegs.topbusy = 0;
|
||||
ipuRegs.cmd.BUSY = 0;
|
||||
break;
|
||||
|
||||
case SCE_IPU_SETIQ:
|
||||
if (!ipuSETIQ(ipu_cmd.current)) return;
|
||||
break;
|
||||
|
||||
case SCE_IPU_SETVQ:
|
||||
if (!ipuSETVQ(ipu_cmd.current)) return;
|
||||
break;
|
||||
|
||||
case SCE_IPU_CSC:
|
||||
if (!ipuCSC(ipu_cmd.current)) return;
|
||||
break;
|
||||
|
||||
case SCE_IPU_PACK:
|
||||
if (!ipuPACK(ipu_cmd.current)) return;
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
// success
|
||||
IPU_LOG("IPU Command finished");
|
||||
ipuRegs.ctrl.BUSY = 0;
|
||||
//ipu_cmd.current = 0xffffffff;
|
||||
hwIntcIrq(INTC_IPU);
|
||||
}
|
||||
|
|
|
@ -288,7 +288,11 @@ union tIPU_cmd
|
|||
|
||||
static IPUregisters& ipuRegs = (IPUregisters&)eeHw[0x2000];
|
||||
|
||||
extern bool FMVstarted;
|
||||
extern bool EnableFMV;
|
||||
|
||||
alignas(16) extern tIPU_cmd ipu_cmd;
|
||||
extern uint eecount_on_last_vdec;
|
||||
extern int coded_block_pattern;
|
||||
extern bool CommandExecuteQueued;
|
||||
|
||||
|
|
|
@ -0,0 +1,387 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2022 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "IPU_MultiISA.h"
|
||||
|
||||
#include "IPU.h"
|
||||
#include "IPUdma.h"
|
||||
#include "yuv2rgb.h"
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// IPU Commands (exec on worker thread only)
|
||||
|
||||
static __fi bool ipuVDEC(u32 val)
|
||||
{
|
||||
static int count = 0;
|
||||
if (count++ > 5) {
|
||||
if (!FMVstarted) {
|
||||
EnableFMV = true;
|
||||
FMVstarted = true;
|
||||
}
|
||||
count = 0;
|
||||
}
|
||||
eecount_on_last_vdec = cpuRegs.cycle;
|
||||
|
||||
switch (ipu_cmd.pos[0])
|
||||
{
|
||||
case 0:
|
||||
if (!bitstream_init()) return false;
|
||||
|
||||
switch ((val >> 26) & 3)
|
||||
{
|
||||
case 0://Macroblock Address Increment
|
||||
decoder.mpeg1 = ipuRegs.ctrl.MP1;
|
||||
ipuRegs.cmd.DATA = get_macroblock_address_increment();
|
||||
break;
|
||||
|
||||
case 1://Macroblock Type
|
||||
decoder.frame_pred_frame_dct = 1;
|
||||
decoder.coding_type = ipuRegs.ctrl.PCT > 0 ? ipuRegs.ctrl.PCT : 1; // Kaiketsu Zorro Mezase doesn't set a Picture type, seems happy with I
|
||||
ipuRegs.cmd.DATA = get_macroblock_modes();
|
||||
break;
|
||||
|
||||
case 2://Motion Code
|
||||
ipuRegs.cmd.DATA = get_motion_delta(0);
|
||||
break;
|
||||
|
||||
case 3://DMVector
|
||||
ipuRegs.cmd.DATA = get_dmv();
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
// HACK ATTACK! This code OR's the MPEG decoder's bitstream position into the upper
|
||||
// 16 bits of DATA; which really doesn't make sense since (a) we already rewound the bits
|
||||
// back into the IPU internal buffer above, and (b) the IPU doesn't have an MPEG internal
|
||||
// 32-bit decoder buffer of its own anyway. Furthermore, setting the upper 16 bits to
|
||||
// any value other than zero appears to work fine. When set to zero, however, FMVs run
|
||||
// very choppy (basically only decoding/updating every 30th frame or so). So yeah,
|
||||
// someone with knowledge on the subject please feel free to explain this one. :) --air
|
||||
|
||||
// The upper bits are the "length" of the decoded command, where the lower is the address.
|
||||
// This is due to differences with IPU and the MPEG standard. See get_macroblock_address_increment().
|
||||
|
||||
ipuRegs.ctrl.ECD = (ipuRegs.cmd.DATA == 0);
|
||||
[[fallthrough]];
|
||||
|
||||
case 1:
|
||||
if (!getBits32((u8*)&ipuRegs.top, 0))
|
||||
{
|
||||
ipu_cmd.pos[0] = 1;
|
||||
return false;
|
||||
}
|
||||
|
||||
ipuRegs.top = BigEndian(ipuRegs.top);
|
||||
|
||||
IPU_LOG("VDEC command data 0x%x(0x%x). Skip 0x%X bits/Table=%d (%s), pct %d",
|
||||
ipuRegs.cmd.DATA, ipuRegs.cmd.DATA >> 16, val & 0x3f, (val >> 26) & 3, (val >> 26) & 1 ?
|
||||
((val >> 26) & 2 ? "DMV" : "MBT") : (((val >> 26) & 2 ? "MC" : "MBAI")), ipuRegs.ctrl.PCT);
|
||||
|
||||
return true;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static __ri bool ipuFDEC(u32 val)
|
||||
{
|
||||
if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false;
|
||||
|
||||
ipuRegs.cmd.DATA = BigEndian(ipuRegs.cmd.DATA);
|
||||
ipuRegs.top = ipuRegs.cmd.DATA;
|
||||
|
||||
IPU_LOG("FDEC read: 0x%08x", ipuRegs.top);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ipuSETIQ(u32 val)
|
||||
{
|
||||
if ((val >> 27) & 1)
|
||||
{
|
||||
u8 (&niq)[64] = decoder.niq;
|
||||
|
||||
for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64((u8*)niq + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
IPU_LOG("Read non-intra quantization matrix from FIFO.");
|
||||
for (uint i = 0; i < 8; i++)
|
||||
{
|
||||
IPU_LOG("%02X %02X %02X %02X %02X %02X %02X %02X",
|
||||
niq[i * 8 + 0], niq[i * 8 + 1], niq[i * 8 + 2], niq[i * 8 + 3],
|
||||
niq[i * 8 + 4], niq[i * 8 + 5], niq[i * 8 + 6], niq[i * 8 + 7]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
u8 (&iq)[64] = decoder.iq;
|
||||
|
||||
for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64((u8*)iq + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
IPU_LOG("Read intra quantization matrix from FIFO.");
|
||||
for (uint i = 0; i < 8; i++)
|
||||
{
|
||||
IPU_LOG("%02X %02X %02X %02X %02X %02X %02X %02X",
|
||||
iq[i * 8 + 0], iq[i * 8 + 1], iq[i * 8 + 2], iq[i *8 + 3],
|
||||
iq[i * 8 + 4], iq[i * 8 + 5], iq[i * 8 + 6], iq[i *8 + 7]);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ipuSETVQ(u32 val)
|
||||
{
|
||||
for(;ipu_cmd.pos[0] < 4; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64(((u8*)g_ipu_vqclut) + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
IPU_LOG("SETVQ command. Read VQCLUT table from FIFO.\n"
|
||||
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
|
||||
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
|
||||
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
|
||||
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d",
|
||||
g_ipu_vqclut[ 0].r, g_ipu_vqclut[ 0].g, g_ipu_vqclut[ 0].b,
|
||||
g_ipu_vqclut[ 1].r, g_ipu_vqclut[ 1].g, g_ipu_vqclut[ 1].b,
|
||||
g_ipu_vqclut[ 2].r, g_ipu_vqclut[ 2].g, g_ipu_vqclut[ 2].b,
|
||||
g_ipu_vqclut[ 3].r, g_ipu_vqclut[ 3].g, g_ipu_vqclut[ 3].b,
|
||||
g_ipu_vqclut[ 4].r, g_ipu_vqclut[ 4].g, g_ipu_vqclut[ 4].b,
|
||||
g_ipu_vqclut[ 5].r, g_ipu_vqclut[ 5].g, g_ipu_vqclut[ 5].b,
|
||||
g_ipu_vqclut[ 6].r, g_ipu_vqclut[ 6].g, g_ipu_vqclut[ 6].b,
|
||||
g_ipu_vqclut[ 7].r, g_ipu_vqclut[ 7].g, g_ipu_vqclut[ 7].b,
|
||||
g_ipu_vqclut[ 8].r, g_ipu_vqclut[ 8].g, g_ipu_vqclut[ 8].b,
|
||||
g_ipu_vqclut[ 9].r, g_ipu_vqclut[ 9].g, g_ipu_vqclut[ 9].b,
|
||||
g_ipu_vqclut[10].r, g_ipu_vqclut[10].g, g_ipu_vqclut[10].b,
|
||||
g_ipu_vqclut[11].r, g_ipu_vqclut[11].g, g_ipu_vqclut[11].b,
|
||||
g_ipu_vqclut[12].r, g_ipu_vqclut[12].g, g_ipu_vqclut[12].b,
|
||||
g_ipu_vqclut[13].r, g_ipu_vqclut[13].g, g_ipu_vqclut[13].b,
|
||||
g_ipu_vqclut[14].r, g_ipu_vqclut[14].g, g_ipu_vqclut[14].b,
|
||||
g_ipu_vqclut[15].r, g_ipu_vqclut[15].g, g_ipu_vqclut[15].b);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// IPU Transfers are split into 8Qwords so we need to send ALL the data
|
||||
static __ri bool ipuCSC(tIPU_CMD_CSC csc)
|
||||
{
|
||||
csc.log_from_YCbCr();
|
||||
|
||||
for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
|
||||
{
|
||||
for(;ipu_cmd.pos[0] < 48; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64((u8*)&decoder.mb8 + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
ipu_csc(decoder.mb8, decoder.rgb32, 0);
|
||||
if (csc.OFM) ipu_dither(decoder.rgb32, decoder.rgb16, csc.DTE);
|
||||
|
||||
if (csc.OFM)
|
||||
{
|
||||
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & decoder.rgb16) + 4 * ipu_cmd.pos[1], 32 - ipu_cmd.pos[1]);
|
||||
if (ipu_cmd.pos[1] < 32) return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & decoder.rgb32) + 4 * ipu_cmd.pos[1], 64 - ipu_cmd.pos[1]);
|
||||
if (ipu_cmd.pos[1] < 64) return false;
|
||||
}
|
||||
|
||||
ipu_cmd.pos[0] = 0;
|
||||
ipu_cmd.pos[1] = 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static __ri bool ipuPACK(tIPU_CMD_CSC csc)
|
||||
{
|
||||
csc.log_from_RGB32();
|
||||
|
||||
for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
|
||||
{
|
||||
for(;ipu_cmd.pos[0] < (int)sizeof(macroblock_rgb32) / 8; ipu_cmd.pos[0]++)
|
||||
{
|
||||
if (!getBits64((u8*)&decoder.rgb32 + 8 * ipu_cmd.pos[0], 1)) return false;
|
||||
}
|
||||
|
||||
ipu_dither(decoder.rgb32, decoder.rgb16, csc.DTE);
|
||||
|
||||
if (!csc.OFM) ipu_vq(decoder.rgb16, g_ipu_indx4);
|
||||
|
||||
if (csc.OFM)
|
||||
{
|
||||
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & decoder.rgb16) + 4 * ipu_cmd.pos[1], 32 - ipu_cmd.pos[1]);
|
||||
if (ipu_cmd.pos[1] < 32) return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*)g_ipu_indx4) + 4 * ipu_cmd.pos[1], 8 - ipu_cmd.pos[1]);
|
||||
if (ipu_cmd.pos[1] < 8) return false;
|
||||
}
|
||||
|
||||
ipu_cmd.pos[0] = 0;
|
||||
ipu_cmd.pos[1] = 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// CORE Functions (referenced from MPEG library)
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
||||
__fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn)
|
||||
{
|
||||
int i;
|
||||
u8* p = (u8*)&rgb32;
|
||||
|
||||
yuv2rgb();
|
||||
|
||||
if (g_ipu_thresh[0] > 0)
|
||||
{
|
||||
for (i = 0; i < 16*16; i++, p += 4)
|
||||
{
|
||||
if ((p[0] < g_ipu_thresh[0]) && (p[1] < g_ipu_thresh[0]) && (p[2] < g_ipu_thresh[0]))
|
||||
*(u32*)p = 0;
|
||||
else if ((p[0] < g_ipu_thresh[1]) && (p[1] < g_ipu_thresh[1]) && (p[2] < g_ipu_thresh[1]))
|
||||
p[3] = 0x40;
|
||||
}
|
||||
}
|
||||
else if (g_ipu_thresh[1] > 0)
|
||||
{
|
||||
for (i = 0; i < 16*16; i++, p += 4)
|
||||
{
|
||||
if ((p[0] < g_ipu_thresh[1]) && (p[1] < g_ipu_thresh[1]) && (p[2] < g_ipu_thresh[1]))
|
||||
p[3] = 0x40;
|
||||
}
|
||||
}
|
||||
if (sgn)
|
||||
{
|
||||
for (i = 0; i < 16*16; i++, p += 4)
|
||||
{
|
||||
*(u32*)p ^= 0x808080;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__fi void ipu_vq(macroblock_rgb16& rgb16, u8* indx4)
|
||||
{
|
||||
const auto closest_index = [&](int i, int j) {
|
||||
u8 index = 0;
|
||||
int min_distance = std::numeric_limits<int>::max();
|
||||
for (u8 k = 0; k < 16; ++k)
|
||||
{
|
||||
const int dr = rgb16.c[i][j].r - g_ipu_vqclut[k].r;
|
||||
const int dg = rgb16.c[i][j].g - g_ipu_vqclut[k].g;
|
||||
const int db = rgb16.c[i][j].b - g_ipu_vqclut[k].b;
|
||||
const int distance = dr * dr + dg * dg + db * db;
|
||||
|
||||
// XXX: If two distances are the same which index is used?
|
||||
if (min_distance > distance)
|
||||
{
|
||||
index = k;
|
||||
min_distance = distance;
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
};
|
||||
|
||||
for (int i = 0; i < 16; ++i)
|
||||
for (int j = 0; j < 8; ++j)
|
||||
indx4[i * 8 + j] = closest_index(i, 2 * j + 1) << 4 | closest_index(i, 2 * j);
|
||||
}
|
||||
|
||||
__noinline void IPUWorker()
|
||||
{
|
||||
pxAssert(ipuRegs.ctrl.BUSY);
|
||||
|
||||
switch (ipu_cmd.CMD)
|
||||
{
|
||||
// These are unreachable (BUSY will always be 0 for them)
|
||||
//case SCE_IPU_BCLR:
|
||||
//case SCE_IPU_SETTH:
|
||||
//break;
|
||||
|
||||
case SCE_IPU_IDEC:
|
||||
if (!mpeg2sliceIDEC()) return;
|
||||
|
||||
//ipuRegs.ctrl.OFC = 0;
|
||||
ipuRegs.topbusy = 0;
|
||||
ipuRegs.cmd.BUSY = 0;
|
||||
break;
|
||||
|
||||
case SCE_IPU_BDEC:
|
||||
if (!mpeg2_slice()) return;
|
||||
|
||||
ipuRegs.topbusy = 0;
|
||||
ipuRegs.cmd.BUSY = 0;
|
||||
|
||||
//if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
|
||||
break;
|
||||
|
||||
case SCE_IPU_VDEC:
|
||||
if (!ipuVDEC(ipu_cmd.current)) return;
|
||||
|
||||
ipuRegs.topbusy = 0;
|
||||
ipuRegs.cmd.BUSY = 0;
|
||||
break;
|
||||
|
||||
case SCE_IPU_FDEC:
|
||||
if (!ipuFDEC(ipu_cmd.current)) return;
|
||||
|
||||
ipuRegs.topbusy = 0;
|
||||
ipuRegs.cmd.BUSY = 0;
|
||||
break;
|
||||
|
||||
case SCE_IPU_SETIQ:
|
||||
if (!ipuSETIQ(ipu_cmd.current)) return;
|
||||
break;
|
||||
|
||||
case SCE_IPU_SETVQ:
|
||||
if (!ipuSETVQ(ipu_cmd.current)) return;
|
||||
break;
|
||||
|
||||
case SCE_IPU_CSC:
|
||||
if (!ipuCSC(ipu_cmd.current)) return;
|
||||
break;
|
||||
|
||||
case SCE_IPU_PACK:
|
||||
if (!ipuPACK(ipu_cmd.current)) return;
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
|
||||
// success
|
||||
IPU_LOG("IPU Command finished");
|
||||
ipuRegs.ctrl.BUSY = 0;
|
||||
//ipu_cmd.current = 0xffffffff;
|
||||
hwIntcIrq(INTC_IPU);
|
||||
}
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
|
@ -0,0 +1,27 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2022 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GS/MultiISA.h"
|
||||
#include "mpeg2lib/Mpeg.h"
|
||||
|
||||
MULTI_ISA_DEF(void IPUWorker();)
|
||||
|
||||
// Quantization matrix
|
||||
extern rgb16_t g_ipu_vqclut[16]; //clut conversion table
|
||||
extern u16 g_ipu_thresh[2]; //thresholds for color conversions
|
||||
|
||||
alignas(16) extern u8 g_ipu_indx4[16*16/2];
|
|
@ -21,6 +21,10 @@
|
|||
#include "yuv2rgb.h"
|
||||
#include "mpeg2lib/Mpeg.h"
|
||||
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
void ipu_dither_reference(const macroblock_rgb32 &rgb32, macroblock_rgb16 &rgb16, int dte);
|
||||
void ipu_dither_sse2(const macroblock_rgb32 &rgb32, macroblock_rgb16 &rgb16, int dte);
|
||||
|
||||
|
@ -120,3 +124,5 @@ __ri void ipu_dither_sse2(const macroblock_rgb32 &rgb32, macroblock_rgb16 &rgb16
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#include "IPU/IPU.h"
|
||||
#include "Mpeg.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
|
||||
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
|
||||
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
|
||||
|
@ -45,9 +47,11 @@
|
|||
* to +-3826 - this is the worst case for a column IDCT where the
|
||||
* column inputs are 16-bit values.
|
||||
*/
|
||||
alignas(16) static u8 clip_lut[1024];
|
||||
alignas(16) extern const std::array<u8, 1024> g_idct_clip_lut;
|
||||
|
||||
#define CLIP(i) ((clip_lut+384)[(i)])
|
||||
#define CLIP(i) ((g_idct_clip_lut.data()+384)[(i)])
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
static __fi void BUTTERFLY(int& t0, int& t1, int w0, int w1, int d0, int d1)
|
||||
{
|
||||
|
@ -219,9 +223,21 @@ __ri void mpeg2_idct_add (const int last, s16 * block, s16 * dest, const int str
|
|||
}
|
||||
}
|
||||
|
||||
mpeg2_scan_pack::mpeg2_scan_pack()
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
||||
#if MULTI_ISA_COMPILE_ONCE
|
||||
|
||||
static constexpr std::array<u8, 1024> make_clip_lut()
|
||||
{
|
||||
static const u8 mpeg2_scan_norm[64] = {
|
||||
std::array<u8, 1024> lut = {};
|
||||
for (int i = -384; i < 640; i++)
|
||||
lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i);
|
||||
return lut;
|
||||
}
|
||||
|
||||
static constexpr mpeg2_scan_pack make_scan_pack()
|
||||
{
|
||||
constexpr u8 mpeg2_scan_norm[64] = {
|
||||
/* Zig-Zag scan pattern */
|
||||
0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
|
||||
12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
|
||||
|
@ -229,7 +245,7 @@ mpeg2_scan_pack::mpeg2_scan_pack()
|
|||
58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
|
||||
};
|
||||
|
||||
static const u8 mpeg2_scan_alt[64] = {
|
||||
constexpr u8 mpeg2_scan_alt[64] = {
|
||||
/* Alternate scan pattern */
|
||||
0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49,
|
||||
41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43,
|
||||
|
@ -237,15 +253,19 @@ mpeg2_scan_pack::mpeg2_scan_pack()
|
|||
53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63
|
||||
};
|
||||
|
||||
for (int i = -384; i < 640; i++)
|
||||
clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i);
|
||||
mpeg2_scan_pack pack = {};
|
||||
|
||||
for (int i = 0; i < 64; i++) {
|
||||
int j = mpeg2_scan_norm[i];
|
||||
norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
pack.norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
j = mpeg2_scan_alt[i];
|
||||
alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
pack.alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
}
|
||||
|
||||
return pack;
|
||||
}
|
||||
|
||||
alignas(16) const mpeg2_scan_pack mpeg2_scan;
|
||||
alignas(16) constexpr std::array<u8, 1024> g_idct_clip_lut = make_clip_lut();
|
||||
alignas(16) constexpr mpeg2_scan_pack mpeg2_scan = make_scan_pack();
|
||||
|
||||
#endif
|
||||
|
|
|
@ -33,8 +33,12 @@
|
|||
#include "Mpeg.h"
|
||||
#include "Vlc.h"
|
||||
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
#include "common/MemsetFast.inl"
|
||||
|
||||
#if MULTI_ISA_COMPILE_ONCE
|
||||
|
||||
const int non_linear_quantizer_scale [] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
|
@ -43,6 +47,10 @@ const int non_linear_quantizer_scale [] =
|
|||
56, 64, 72, 80, 88, 96, 104, 112
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
/* Bitstream and buffer needs to be reallocated in order for successful
|
||||
reading of the old data. Here the old data stored in the 2nd slot
|
||||
of the internal buffer is copied to 1st slot, and the new data read
|
||||
|
@ -1272,4 +1280,6 @@ __fi bool mpeg2_slice()
|
|||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -24,6 +24,12 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "IPU/IPU.h"
|
||||
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
#include "common/Assertions.h"
|
||||
|
||||
// the IPU is fixed to 16 byte strides (128-bit / QWC resolution):
|
||||
static const uint decoder_stride = 16;
|
||||
|
||||
|
@ -184,30 +190,31 @@ struct mpeg2_scan_pack
|
|||
{
|
||||
u8 norm[64];
|
||||
u8 alt[64];
|
||||
|
||||
mpeg2_scan_pack();
|
||||
};
|
||||
|
||||
extern int bitstream_init ();
|
||||
extern u32 UBITS(uint bits);
|
||||
extern s32 SBITS(uint bits);
|
||||
|
||||
extern void mpeg2_idct_copy(s16 * block, u8* dest, int stride);
|
||||
extern void mpeg2_idct_add(int last, s16 * block, s16* dest, int stride);
|
||||
MULTI_ISA_DEF(
|
||||
extern int bitstream_init();
|
||||
|
||||
extern bool mpeg2sliceIDEC();
|
||||
extern bool mpeg2_slice();
|
||||
extern int get_macroblock_address_increment();
|
||||
extern int get_macroblock_modes();
|
||||
extern void mpeg2_idct_copy(s16 * block, u8* dest, int stride);
|
||||
extern void mpeg2_idct_add(int last, s16 * block, s16* dest, int stride);
|
||||
|
||||
extern int get_motion_delta(const int f_code);
|
||||
extern int get_dmv();
|
||||
extern bool mpeg2sliceIDEC();
|
||||
extern bool mpeg2_slice();
|
||||
extern int get_macroblock_address_increment();
|
||||
extern int get_macroblock_modes();
|
||||
|
||||
extern void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn);
|
||||
extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte);
|
||||
extern void ipu_vq(macroblock_rgb16& rgb16, u8* indx4);
|
||||
extern int get_motion_delta(const int f_code);
|
||||
extern int get_dmv();
|
||||
|
||||
extern int slice (u8 * buffer);
|
||||
extern void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn);
|
||||
extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte);
|
||||
extern void ipu_vq(macroblock_rgb16& rgb16, u8* indx4);
|
||||
|
||||
extern int slice (u8 * buffer);
|
||||
)
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define BigEndian(in) _byteswap_ulong(in)
|
||||
|
|
|
@ -36,6 +36,8 @@
|
|||
#define IPU_RCR_COEFF 0xcc // 1.59375
|
||||
#define IPU_BCB_COEFF 0x102 // 2.015625
|
||||
|
||||
MULTI_ISA_UNSHARED_START
|
||||
|
||||
// conforming implementation for reference, do not optimise
|
||||
void yuv2rgb_reference(void)
|
||||
{
|
||||
|
@ -149,3 +151,5 @@ __ri void yuv2rgb_sse2()
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
MULTI_ISA_UNSHARED_END
|
||||
|
|
|
@ -15,7 +15,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
extern void yuv2rgb_reference();
|
||||
#include "GS/MultiISA.h"
|
||||
|
||||
MULTI_ISA_DEF(extern void yuv2rgb_reference();)
|
||||
|
||||
#define yuv2rgb yuv2rgb_sse2
|
||||
extern void yuv2rgb_sse2();
|
||||
MULTI_ISA_DEF(extern void yuv2rgb_sse2();)
|
||||
|
|
|
@ -576,6 +576,7 @@
|
|||
<ClCompile Include="CDVD\CDVDisoReader.cpp" />
|
||||
<ClCompile Include="Ipu\IPU.cpp" />
|
||||
<ClCompile Include="Ipu\IPU_Fifo.cpp" />
|
||||
<ClCompile Include="Ipu\IPU_MultiISA.cpp" />
|
||||
<ClCompile Include="Ipu\yuv2rgb.cpp" />
|
||||
<ClCompile Include="Ipu\mpeg2lib\Idct.cpp" />
|
||||
<ClCompile Include="Ipu\mpeg2lib\Mpeg.cpp" />
|
||||
|
@ -1017,6 +1018,7 @@
|
|||
<ClInclude Include="CDVD\CDVDisoReader.h" />
|
||||
<ClInclude Include="Ipu\IPU.h" />
|
||||
<ClInclude Include="Ipu\IPU_Fifo.h" />
|
||||
<ClInclude Include="Ipu\IPU_MultiISA.h" />
|
||||
<ClInclude Include="Ipu\yuv2rgb.h" />
|
||||
<ClInclude Include="Ipu\mpeg2lib\Mpeg.h" />
|
||||
<ClInclude Include="Ipu\mpeg2lib\Vlc.h" />
|
||||
|
|
|
@ -692,6 +692,9 @@
|
|||
<ClCompile Include="IPU\IPU_Fifo.cpp">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="IPU\IPU_MultiISA.cpp">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="IPU\yuv2rgb.cpp">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClCompile>
|
||||
|
@ -2011,6 +2014,9 @@
|
|||
<ClInclude Include="IPU\IPU_Fifo.h">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="IPU\IPU_MultiISA.h">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="IPU\yuv2rgb.h">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -430,6 +430,7 @@
|
|||
<ClCompile Include="CDVD\CDVDisoReader.cpp" />
|
||||
<ClCompile Include="Ipu\IPU.cpp" />
|
||||
<ClCompile Include="Ipu\IPU_Fifo.cpp" />
|
||||
<ClCompile Include="Ipu\IPU_MultiISA.cpp" />
|
||||
<ClCompile Include="Ipu\yuv2rgb.cpp" />
|
||||
<ClCompile Include="Ipu\mpeg2lib\Idct.cpp" />
|
||||
<ClCompile Include="Ipu\mpeg2lib\Mpeg.cpp" />
|
||||
|
@ -739,6 +740,7 @@
|
|||
<ClInclude Include="CDVD\CDVDcommon.h" />
|
||||
<ClInclude Include="Ipu\IPU.h" />
|
||||
<ClInclude Include="Ipu\IPU_Fifo.h" />
|
||||
<ClInclude Include="Ipu\IPU_MultiISA.h" />
|
||||
<ClInclude Include="Ipu\yuv2rgb.h" />
|
||||
<ClInclude Include="Ipu\mpeg2lib\Mpeg.h" />
|
||||
<ClInclude Include="Ipu\mpeg2lib\Vlc.h" />
|
||||
|
|
|
@ -617,6 +617,9 @@
|
|||
<ClCompile Include="IPU\IPU_Fifo.cpp">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="IPU\IPU_MultiISA.cpp">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="IPU\yuv2rgb.cpp">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClCompile>
|
||||
|
@ -1532,6 +1535,9 @@
|
|||
<ClInclude Include="IPU\IPU_Fifo.h">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="IPU\IPU_MultiISA.h">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="IPU\yuv2rgb.h">
|
||||
<Filter>System\Ps2\IPU</Filter>
|
||||
</ClInclude>
|
||||
|
|
Loading…
Reference in New Issue