Added some compiler hints to help the IPU optimize a little better. It had several candidiates for forced inlining.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@315 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2008-11-11 13:43:41 +00:00 committed by Gregory Hainaut
parent 01b03d3707
commit dc12877b8b
4 changed files with 55 additions and 87 deletions

View File

@ -1,6 +1,4 @@
@echo off
echo %1 %2 %3\svnrev_template.h %2\svnrev.h
%1 %2 %3\svnrev_template.h %2\svnrev.h
if not ERRORLEVEL 0 (
echo Automatic revision update unavailable, using generic template instead.

View File

@ -232,11 +232,9 @@ u32 ipuRead32(u32 mem)
//ipuRegs->ctrl.OFC = min(g_nIPU0Data, 8); // check if transfering to ipu0
ipuRegs->ctrl.CBP = coded_block_pattern;
#ifdef IPU_LOG
if( !ipuRegs->ctrl.BUSY ) {
if( !ipuRegs->ctrl.BUSY )
IPU_LOG("Ipu read32: IPU_CTRL=0x%08X %x\n", ipuRegs->ctrl._u32, cpuRegs.pc);
}
#endif
return ipuRegs->ctrl._u32;
case 0x10002020: // IPU_BP
@ -245,9 +243,7 @@ u32 ipuRead32(u32 mem)
ipuRegs->ipubp |= g_BP.IFC<<8;
ipuRegs->ipubp |= (g_BP.FP+g_BP.bufferhasnew) << 16;
#ifdef IPU_LOG
IPU_LOG("Ipu read32: IPU_BP=0x%08X\n", *(u32*)&g_BP);
#endif
return ipuRegs->ipubp;
}
@ -269,27 +265,21 @@ u64 ipuRead64(u32 mem)
switch (mem){
case 0x10002000: // IPU_CMD
#ifdef IPU_LOG
//if(!ipuRegs->cmd.BUSY){
if( ipuRegs->cmd.DATA&0xffffff ) {
if( ipuRegs->cmd.DATA&0xffffff )
IPU_LOG("Ipu read64: IPU_CMD=BUSY=%x, DATA=%08X\n", ipuRegs->cmd.BUSY?1:0,ipuRegs->cmd.DATA);
}
#endif
//return *(u64*)&ipuRegs->cmd;
break;
case 0x10002030: // IPU_TOP
#ifdef IPU_LOG
IPU_LOG("Ipu read64: IPU_TOP=%x, bp = %d\n",ipuRegs->top,g_BP.BP);
#endif
//return *(u64*)&ipuRegs->top;
break;
default:
#ifdef IPU_LOG
IPU_LOG("Ipu read64: Unknown=%x\n", mem);
#endif
break;
}
@ -463,9 +453,7 @@ void ipuWrite32(u32 mem,u32 value)
switch (mem){
case 0x10002000: // IPU_CMD
#ifdef IPU_LOG
IPU_LOG("Ipu write32: IPU_CMD=0x%08X\n",value);
#endif
IPUCMD_WRITE(value);
break;
case 0x10002010: // IPU_CTRL
@ -478,15 +466,11 @@ void ipuWrite32(u32 mem,u32 value)
ipuSoftReset();
}
#ifdef IPU_LOG
IPU_LOG("Ipu write32: IPU_CTRL=0x%08X\n",value);
#endif
break;
default:
#ifdef IPU_LOG
IPU_LOG("Ipu write32: Unknown=%x\n", mem);
#endif
*(u32*)((u8*)ipuRegs + (mem&0xfff)) = value;
break;
}
@ -498,16 +482,12 @@ void ipuWrite64(u32 mem, u64 value)
switch (mem){
case 0x10002000:
#ifdef IPU_LOG
IPU_LOG("Ipu write64: IPU_CMD=0x%08X\n",value);
#endif
IPUCMD_WRITE((u32)value);
break;
default:
#ifdef IPU_LOG
IPU_LOG("Ipu write64: Unknown=%x\n", mem);
#endif
*(u64*)((u8*)ipuRegs + (mem&0xfff)) = value;
break;
}
@ -638,6 +618,7 @@ static BOOL ipuIDEC(u32 val)
}else{ IPU_LOG(" Output format is RGB16.");}
IPU_LOG("\n");
#endif
g_BP.BP+= idec.FB;//skip FB bits
//from IPU_CTRL
ipuRegs->ctrl.PCT = I_TYPE; //Intra DECoding;)
@ -827,31 +808,30 @@ static BOOL ipuSETVQ(u32 val)
{
g_nCmdPos[0] += getBits((u8*)vqclut+g_nCmdPos[0], 256-8*g_nCmdPos[0], 1); // 16*2*8
if( g_nCmdPos[0] == 32 ) {
#ifdef IPU_LOG
IPU_LOG("IPU SETVQ command.\nRead VQCLUT table from IPU FIFO.\n");
IPU_LOG(
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n",
vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F,
vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F,
vqclut[2] >> 10, (vqclut[2] >> 5) & 0x1F, vqclut[2] & 0x1F,
vqclut[3] >> 10, (vqclut[3] >> 5) & 0x1F, vqclut[3] & 0x1F,
vqclut[4] >> 10, (vqclut[4] >> 5) & 0x1F, vqclut[4] & 0x1F,
vqclut[5] >> 10, (vqclut[5] >> 5) & 0x1F, vqclut[5] & 0x1F,
vqclut[6] >> 10, (vqclut[6] >> 5) & 0x1F, vqclut[6] & 0x1F,
vqclut[7] >> 10, (vqclut[7] >> 5) & 0x1F, vqclut[7] & 0x1F,
vqclut[8] >> 10, (vqclut[8] >> 5) & 0x1F, vqclut[8] & 0x1F,
vqclut[9] >> 10, (vqclut[9] >> 5) & 0x1F, vqclut[9] & 0x1F,
vqclut[10] >> 10, (vqclut[10] >> 5) & 0x1F, vqclut[10] & 0x1F,
vqclut[11] >> 10, (vqclut[11] >> 5) & 0x1F, vqclut[11] & 0x1F,
vqclut[12] >> 10, (vqclut[12] >> 5) & 0x1F, vqclut[12] & 0x1F,
vqclut[13] >> 10, (vqclut[13] >> 5) & 0x1F, vqclut[13] & 0x1F,
vqclut[14] >> 10, (vqclut[14] >> 5) & 0x1F, vqclut[14] & 0x1F,
vqclut[15] >> 10, (vqclut[15] >> 5) & 0x1F, vqclut[15] & 0x1F);
#endif
if( g_nCmdPos[0] == 32 )
{
IPU_LOG("IPU SETVQ command.\nRead VQCLUT table from IPU FIFO.\n");
IPU_LOG(
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n",
vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F,
vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F,
vqclut[2] >> 10, (vqclut[2] >> 5) & 0x1F, vqclut[2] & 0x1F,
vqclut[3] >> 10, (vqclut[3] >> 5) & 0x1F, vqclut[3] & 0x1F,
vqclut[4] >> 10, (vqclut[4] >> 5) & 0x1F, vqclut[4] & 0x1F,
vqclut[5] >> 10, (vqclut[5] >> 5) & 0x1F, vqclut[5] & 0x1F,
vqclut[6] >> 10, (vqclut[6] >> 5) & 0x1F, vqclut[6] & 0x1F,
vqclut[7] >> 10, (vqclut[7] >> 5) & 0x1F, vqclut[7] & 0x1F,
vqclut[8] >> 10, (vqclut[8] >> 5) & 0x1F, vqclut[8] & 0x1F,
vqclut[9] >> 10, (vqclut[9] >> 5) & 0x1F, vqclut[9] & 0x1F,
vqclut[10] >> 10, (vqclut[10] >> 5) & 0x1F, vqclut[10] & 0x1F,
vqclut[11] >> 10, (vqclut[11] >> 5) & 0x1F, vqclut[11] & 0x1F,
vqclut[12] >> 10, (vqclut[12] >> 5) & 0x1F, vqclut[12] & 0x1F,
vqclut[13] >> 10, (vqclut[13] >> 5) & 0x1F, vqclut[13] & 0x1F,
vqclut[14] >> 10, (vqclut[14] >> 5) & 0x1F, vqclut[14] & 0x1F,
vqclut[15] >> 10, (vqclut[15] >> 5) & 0x1F, vqclut[15] & 0x1F);
}
return g_nCmdPos[0] == 32;
@ -1029,14 +1009,11 @@ void IPUCMD_WRITE(u32 val) {
return;
case SCE_IPU_SETIQ:
#ifdef IPU_LOG
IPU_LOG("IPU SETIQ command.\n");
#endif
#ifdef IPU_LOG
if (val & 0x3f){
if (val & 0x3f)
IPU_LOG("Skip %d bits.\n", val & 0x3f);
}
#endif
g_BP.BP+= val & 0x3F;
if( ipuSETIQ(ipuRegs->cmd.DATA) ) {
@ -1689,9 +1666,7 @@ int IPU1dma()
}
ipu1dma->chcr = (ipu1dma->chcr & 0xFFFF) | ( (*ptag) & 0xFFFF0000 );
#ifdef IPU_LOG
IPU_LOG("dmaIrq Set\n");
#endif
IPU_LOG("IPU dmaIrq Set\n");
INT(DMAC_TO_IPU, totalqwc*BIAS);
g_nDMATransfer |= IPU_DMA_TIE1;
return totalqwc;
@ -1720,10 +1695,8 @@ int IPU1dma()
// Transfer Dn_QWC from Dn_MADR to GIF
if ((ipu1dma->chcr & 0xc) == 0 || ipu1dma->qwc > 0) { // Normal Mode
#ifdef IPU_LOG
IPU_LOG("dmaIPU1 Normal size=%d, addr=%lx, fifosize=%x\n",
ipu1dma->qwc, ipu1dma->madr, 8 - g_BP.IFC);
#endif
IPU_LOG("dmaIPU1 Normal size=%d, addr=%lx, fifosize=%x\n",
ipu1dma->qwc, ipu1dma->madr, 8 - g_BP.IFC);
IPU1chain();
INT(DMAC_TO_IPU, (ipu1cycles+totalqwc)*BIAS);
return totalqwc;
@ -1776,16 +1749,12 @@ int IPU1dma()
break;
default:
#ifdef IPU_LOG
IPU_LOG("ERROR: different transfer mode!, Please report to PCSX2 Team\n");
#endif
SysPrintf("IPU ERROR: different transfer mode!, Please report to PCSX2 Team\n");
break;
}
#ifdef IPU_LOG
IPU_LOG("dmaIPU1 dmaChain %8.8x_%8.8x size=%d, addr=%lx, fifosize=%x\n",
ptag[1], ptag[0], ipu1dma->qwc, ipu1dma->madr, 8 - g_BP.IFC);
#endif
if( (ipu1dma->chcr & 0x80) && ptag[0] & 0x80000000 )
g_nDMATransfer |= IPU_DMA_DOTIE1;
@ -1967,9 +1936,7 @@ void dmaIPU1() // toIPU
extern void GIFdma();
void ipu0Interrupt() {
#ifdef IPU_LOG
IPU_LOG("ipu0Interrupt: %x\n", cpuRegs.cycle);
#endif
if( g_nDMATransfer & IPU_DMA_FIREINT0 ) {
hwIntcIrq(INTC_IPU);
@ -2000,9 +1967,7 @@ void ipu0Interrupt() {
}
void ipu1Interrupt() {
#ifdef IPU_LOG
IPU_LOG("ipu1Interrupt %x:\n", cpuRegs.cycle);
#endif
if( g_nDMATransfer & IPU_DMA_FIREINT1 ) {
hwIntcIrq(INTC_IPU);

View File

@ -22,6 +22,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
// [Air] Note: many functions in this module are large and only used once, so they
// have been forced to inline since it won't bloat the program and gets rid of
// some call overhead.
#include "Mpeg.h"
#include "Vlc.h"
#include "coroutine.h"
@ -161,7 +165,7 @@ int get_macroblock_modes (decoder_t * const decoder)
#undef bit_ptr
}
static int get_quantizer_scale (decoder_t * const decoder)
static FORCEINLINE int get_quantizer_scale (decoder_t * const decoder)
{
int quantizer_scale_code;
@ -172,7 +176,7 @@ static int get_quantizer_scale (decoder_t * const decoder)
else return quantizer_scale_code << 1;
}
static int get_coded_block_pattern (decoder_t * const decoder)
static FORCEINLINE int get_coded_block_pattern (decoder_t * const decoder)
{
const CBPtab * tab;
@ -189,7 +193,7 @@ static int get_coded_block_pattern (decoder_t * const decoder)
return tab->cbp;
}
static int get_luma_dc_dct_diff (decoder_t * const decoder)
static FORCEINLINE int get_luma_dc_dct_diff (decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
@ -227,7 +231,7 @@ static int get_luma_dc_dct_diff (decoder_t * const decoder)
#undef bit_ptr
}
static int get_chroma_dc_dct_diff (decoder_t * const decoder)
static FORCEINLINE int get_chroma_dc_dct_diff (decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
@ -271,7 +275,7 @@ do { \
val = SBITS (val, 1) ^ 2047; \
} while (0)
static void get_intra_block_B14 (decoder_t * const decoder)
static FORCEINLINE void get_intra_block_B14 (decoder_t * const decoder)
{
int i;
int j;
@ -380,7 +384,7 @@ static void get_intra_block_B14 (decoder_t * const decoder)
decoder->bitstream_bits = bits;
}
static void get_intra_block_B15 (decoder_t * const decoder)
static FORCEINLINE void get_intra_block_B15 (decoder_t * const decoder)
{
int i;
int j;
@ -485,7 +489,7 @@ static void get_intra_block_B15 (decoder_t * const decoder)
decoder->bitstream_bits = bits;
}
static int get_non_intra_block (decoder_t * const decoder)
static FORCEINLINE int get_non_intra_block (decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
@ -613,7 +617,7 @@ static int get_non_intra_block (decoder_t * const decoder)
#undef bit_ptr
}
static void get_mpeg1_intra_block (decoder_t * const decoder)
static FORCEINLINE void get_mpeg1_intra_block (decoder_t * const decoder)
{
int i;
int j;
@ -736,7 +740,7 @@ static void get_mpeg1_intra_block (decoder_t * const decoder)
decoder->bitstream_bits = bits;
}
static int get_mpeg1_non_intra_block (decoder_t * const decoder)
static FORCEINLINE int get_mpeg1_non_intra_block (decoder_t * const decoder)
{
int i;
int j;
@ -1283,7 +1287,7 @@ void mpeg2_slice(void* pdone)
so_exit();
}
int get_motion_delta (decoder_t * const decoder,
int FORCEINLINE get_motion_delta (decoder_t * const decoder,
const int f_code)
{
#define bit_buf (decoder->bitstream_buf)
@ -1337,7 +1341,7 @@ int get_motion_delta (decoder_t * const decoder,
#undef bit_ptr
}
int get_dmv (decoder_t * const decoder)
int FORCEINLINE get_dmv (decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)

View File

@ -168,9 +168,10 @@ void mpeg2sliceIDEC(void* pdone);
void mpeg2_slice(void* pdone);
int get_macroblock_address_increment(decoder_t * const decoder);
int get_macroblock_modes (decoder_t * const decoder);
int get_motion_delta (decoder_t * const decoder,
const int f_code);
int get_dmv (decoder_t * const decoder);
extern int get_motion_delta (decoder_t * const decoder, const int f_code);
extern int get_dmv (decoder_t * const decoder);
extern int non_linear_quantizer_scale[]; // JayteeMaster: it is needed in Ipu.c
void ipu_csc(struct macroblock_8 *mb8, struct macroblock_rgb32 *rgb32, int sgn);