GregMiscellaneous: Sync against trunk. (3768:3804)

git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3805 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2010-09-19 08:01:48 +00:00
parent 28819c2098
commit 0a832acabb
36 changed files with 1176 additions and 1302 deletions

View File

@ -20,9 +20,10 @@
u32 s_iLastCOP0Cycle = 0;
u32 s_iLastPERFCycle[2] = { 0, 0 };
__ri void UpdateCP0Status() {
//currently the 2 memory modes are not implemented. Given this function is called so much,
//it's commented out for now. Only the interrupt test is needed. (rama)
// Updates the CPU's mode of operation (either, Kernel, Supervisor, or User modes).
// Currently the different modes are not implemented.
// Given this function is called so much, it's commented out for now. (rama)
__ri void cpuUpdateOperationMode() {
//u32 value = cpuRegs.CP0.n.Status.val;
@ -32,7 +33,6 @@ __ri void UpdateCP0Status() {
//} else { // User Mode
// memSetUserMode();
//}
cpuTestHwInts();
}
void __fastcall WriteCP0Status(u32 value) {

View File

@ -50,8 +50,7 @@ void hwReset()
{
hwInit();
memzero_ptr<Ps2MemSize::Hardware>( eeHw );
//memset(eeHw+0x2000, 0, 0x0000e000);
memzero( eeHw );
psHu32(SBUS_F260) = 0x1D000060;
@ -73,16 +72,16 @@ void hwReset()
ipuDmaReset();
}
__fi void intcInterrupt()
__fi uint intcInterrupt()
{
if ((psHu32(INTC_STAT)) == 0) {
//DevCon.Warning("*PCSX2*: intcInterrupt already cleared");
return;
return 0;
}
if ((psHu32(INTC_STAT) & psHu32(INTC_MASK)) == 0)
{
//DevCon.Warning("*PCSX2*: No valid interrupt INTC_MASK: %x INTC_STAT: %x", psHu32(INTC_MASK), psHu32(INTC_STAT));
return;
return 0;
}
HW_LOG("intcInterrupt %x", psHu32(INTC_STAT) & psHu32(INTC_MASK));
@ -91,27 +90,29 @@ __fi void intcInterrupt()
counters[1].hold = rcntRcount(1);
}
cpuException(0x400, cpuRegs.branch);
//cpuException(0x400, cpuRegs.branch);
return 0x400;
}
__fi void dmacInterrupt()
__fi uint dmacInterrupt()
{
if( ((psHu16(DMAC_STAT + 2) & psHu16(DMAC_STAT)) == 0 ) &&
( psHu16(DMAC_STAT) & 0x8000) == 0 )
{
//DevCon.Warning("No valid DMAC interrupt MASK %x STAT %x", psHu16(DMAC_STAT+2), psHu16(DMAC_STAT));
return;
return 0;
}
if (!(dmacRegs.ctrl.DMAE) || psHu8(DMAC_ENABLER+2) == 1)
if (!dmacRegs.ctrl.DMAE || psHu8(DMAC_ENABLER+2) == 1)
{
//DevCon.Warning("DMAC Suspended or Disabled on interrupt");
return;
return 0;
}
HW_LOG("dmacInterrupt %x", (psHu16(DMAC_STAT + 2) & psHu16(DMAC_STAT) |
psHu16(DMAC_STAT) & 0x8000));
psHu16(DMAC_STAT) & 0x8000));
cpuException(0x800, cpuRegs.branch);
//cpuException(0x800, cpuRegs.branch);
return 0x800;
}
void hwIntcIrq(int n)

View File

@ -55,9 +55,6 @@ int coded_block_pattern = 0;
u8 indx4[16*16/2];
__aligned16 decoder_t decoder;
__aligned16 u8 _readbits[80]; //local buffer (ring buffer)
u8* readbits = _readbits; // always can decrement by one 1qw
__fi void IPUProcessInterrupt()
{
if (ipuRegs.ctrl.BUSY && g_BP.IFC) IPUWorker();
@ -96,8 +93,6 @@ void ReportIPU()
Console.WriteLn("g_decoder = 0x%x.", &decoder);
Console.WriteLn("mpeg2_scan = 0x%x.", &mpeg2_scan);
Console.WriteLn(ipu_cmd.desc());
Console.WriteLn("_readbits = 0x%x. readbits - _readbits, which is also frozen, is 0x%x.",
_readbits, readbits - _readbits);
Console.Newline();
}
@ -114,15 +109,6 @@ void SaveStateBase::ipuFreeze()
Freeze(coded_block_pattern);
Freeze(decoder);
Freeze(ipu_cmd);
Freeze(_readbits);
int temp = readbits - _readbits;
Freeze(temp);
if (IsLoading())
{
readbits = _readbits;
}
}
void tIPU_CMD_IDEC::log() const
@ -213,21 +199,27 @@ __fi u32 ipuRead32(u32 mem)
switch (mem)
{
ipucase(IPU_CTRL): // IPU_CTRL
{
ipuRegs.ctrl.IFC = g_BP.IFC;
ipuRegs.ctrl.CBP = coded_block_pattern;
if (!ipuRegs.ctrl.BUSY)
IPU_LOG("read32: IPU_CTRL=0x%08X", ipuRegs.ctrl._u32);
return ipuRegs.ctrl._u32;
return ipuRegs.ctrl._u32;
}
ipucase(IPU_BP): // IPU_BP
{
pxAssume(g_BP.FP <= 2);
ipuRegs.ipubp = g_BP.BP & 0x7f;
ipuRegs.ipubp |= g_BP.IFC << 8;
ipuRegs.ipubp |= (g_BP.FP /*+ g_BP.bufferhasnew*/) << 16;
ipuRegs.ipubp |= g_BP.FP << 16;
IPU_LOG("read32: IPU_BP=0x%08X", ipuRegs.ipubp);
return ipuRegs.ipubp;
return ipuRegs.ipubp;
}
default:
IPU_LOG("read32: Addr=0x%08X Value = 0x%08X", mem, psHu32(IPU_CMD + mem));
@ -283,9 +275,7 @@ void ipuSoftReset()
ipu_cmd.clear();
ipuRegs.cmd.BUSY = 0;
g_BP.BP = 0;
g_BP.FP = 0;
//g_BP.bufferhasnew = 0;
memzero(g_BP);
}
__fi bool ipuWrite32(u32 mem, u32 value)
@ -354,12 +344,11 @@ static void ipuBCLR(u32 val)
{
ipu_fifo.in.clear();
memzero(g_BP);
g_BP.BP = val & 0x7F;
g_BP.FP = 0;
//g_BP.bufferhasnew = 0;
ipuRegs.ctrl.BUSY = 0;
ipuRegs.cmd.BUSY = 0;
memzero(_readbits);
IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP);
}
@ -370,7 +359,7 @@ static bool ipuIDEC(u32 val, bool resume)
if (!resume)
{
idec.log();
g_BP.BP += idec.FB;//skip FB bits
g_BP.Advance(idec.FB);
//from IPU_CTRL
ipuRegs.ctrl.PCT = I_TYPE; //Intra DECoding;)
@ -407,7 +396,7 @@ static __fi bool ipuBDEC(u32 val, bool resume)
bdec.log(s_bdec);
if (IsDebugBuild) s_bdec++;
g_BP.BP += bdec.FB;//skip FB bits
g_BP.Advance(bdec.FB);
decoder.coding_type = I_TYPE;
decoder.mpeg1 = ipuRegs.ctrl.MP1;
decoder.q_scale_type = ipuRegs.ctrl.QST;
@ -433,11 +422,7 @@ static bool __fastcall ipuVDEC(u32 val)
switch (ipu_cmd.pos[0])
{
case 0:
ipuRegs.cmd.DATA = 0;
if (!getBits32((u8*)&decoder.bitstream_buf, 0)) return false;
decoder.bitstream_bits = -16;
BigEndian(decoder.bitstream_buf, decoder.bitstream_buf);
if (!bitstream_init()) return false;
switch ((val >> 26) & 3)
{
@ -459,17 +444,14 @@ static bool __fastcall ipuVDEC(u32 val)
case 3://DMVector
ipuRegs.cmd.DATA = get_dmv();
break;
jNO_DEFAULT
}
g_BP.BP += (int)decoder.bitstream_bits + 16;
ipuRegs.cmd.DATA &= 0xFFFF;
ipuRegs.cmd.DATA |= 0x10000;
if ((int)g_BP.BP < 0)
{
g_BP.BP += 128;
ReorderBitstream();
}
ipuRegs.cmd.DATA = (ipuRegs.cmd.DATA & 0xFFFF) | ((decoder.bitstream_bits + 16) << 16);
//ipuRegs.cmd.DATA = (ipuRegs.cmd.DATA & 0xFFFF) | ((decoder.bitstream_bits + 16) << 16);
ipuRegs.ctrl.ECD = (ipuRegs.cmd.DATA == 0);
case 1:
@ -479,14 +461,14 @@ static bool __fastcall ipuVDEC(u32 val)
return false;
}
BigEndian(ipuRegs.top, ipuRegs.top);
ipuRegs.top = BigEndian(ipuRegs.top);
IPU_LOG("VDEC command data 0x%x(0x%x). Skip 0x%X bits/Table=%d (%s), pct %d",
ipuRegs.cmd.DATA, ipuRegs.cmd.DATA >> 16, val & 0x3f, (val >> 26) & 3, (val >> 26) & 1 ?
((val >> 26) & 2 ? "DMV" : "MBT") : (((val >> 26) & 2 ? "MC" : "MBAI")), ipuRegs.ctrl.PCT);
return true;
jNO_DEFAULT
jNO_DEFAULT
}
return false;
@ -496,7 +478,7 @@ static __fi bool ipuFDEC(u32 val)
{
if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false;
BigEndian(ipuRegs.cmd.DATA, ipuRegs.cmd.DATA);
ipuRegs.cmd.DATA = BigEndian(ipuRegs.cmd.DATA);
ipuRegs.top = ipuRegs.cmd.DATA;
IPU_LOG("FDEC read: 0x%08x", ipuRegs.top);
@ -553,11 +535,10 @@ static bool ipuSETVQ(u32 val)
if (!getBits64(((u8*)vqclut) + 8 * ipu_cmd.pos[0], 1)) return false;
}
IPU_LOG("SETVQ command.\nRead VQCLUT table from FIFO.");
IPU_LOG(
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d"
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
IPU_LOG("SETVQ command. Read VQCLUT table from FIFO.\n"
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n"
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d",
vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F,
vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F,
@ -723,148 +704,48 @@ __fi void ipu_vq(macroblock_rgb16& rgb16, u8* indx4)
Console.Error("IPU: VQ not implemented");
}
__fi void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16)
{
const u8 *s = (const u8*)&mb8;
s16 *d = (s16*)&mb16;
int i;
for (i = 0; i < 256; i++) *d++ = *s++; //Y bias - 16
for (i = 0; i < 64; i++) *d++ = *s++; //Cr bias - 128
for (i = 0; i < 64; i++) *d++ = *s++; //Cb bias - 128
}
// --------------------------------------------------------------------------------------
// Buffer reader
// --------------------------------------------------------------------------------------
// move the readbits queue
__fi void inc_readbits()
__ri u32 UBITS(uint bits)
{
readbits += 16;
if (readbits >= _readbits + 64)
{
// move back
*(u64*)(_readbits) = *(u64*)(_readbits + 64);
*(u64*)(_readbits + 8) = *(u64*)(_readbits + 72);
readbits = _readbits;
}
uint readpos8 = g_BP.BP/8;
uint result = BigEndian(*(u32*)( (u8*)g_BP.internal_qwc + readpos8 ));
uint bp7 = (g_BP.BP & 7);
result <<= bp7;
result >>= (32 - bits);
return result;
}
// returns the pointer of readbits moved by 1 qword
__fi u8* next_readbits()
__ri s32 SBITS(uint bits)
{
return readbits + 16;
}
// Read an unaligned 32 bit value and then shift the bits up and then back down.
// returns the pointer of readbits moved by 1 qword
u8* prev_readbits()
{
if (readbits < _readbits + 16) return _readbits + 48 - (readbits - _readbits);
uint readpos8 = g_BP.BP/8;
return readbits - 16;
}
int result = BigEndian(*(s32*)( (s8*)g_BP.internal_qwc + readpos8 ));
uint bp7 = (g_BP.BP & 7);
result <<= bp7;
result >>= (32 - bits);
void ReorderBitstream()
{
readbits = prev_readbits();
g_BP.FP = 2;
}
// IPU has a 2qword internal buffer whose status is pointed by FP.
// If FP is 1, there's 1 qword in buffer. Second qword is only loaded
// incase there are less than 32bits available in the first qword.
// \return Number of bits available (clamps at 16 bits)
u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size)
{
if (g_BP.FP == 0)
{
if (ipu_fifo.in.read(next_readbits()) == 0) return 0;
inc_readbits();
g_BP.FP = 1;
}
if ((g_BP.FP < 2) && ((*(int*)pointer + size) >= 128))
{
if (ipu_fifo.in.read(next_readbits())) g_BP.FP += 1;
}
if (*(int*)pointer >= 128)
{
pxAssert(g_BP.FP >= 1);
if (g_BP.FP > 1) inc_readbits();
if (advance)
{
g_BP.FP--;
*pointer &= 127;
}
}
return (g_BP.FP >= 1) ? g_BP.FP * 128 - (*(int*)pointer) : 0;
return result;
}
// whenever reading fractions of bytes. The low bits always come from the next byte
// while the high bits come from the current byte
u8 __fastcall getBits128(u8 *address, u32 advance)
u8 getBits64(u8 *address, bool advance)
{
u64 mask2;
u128 mask;
u8* readpos;
if (!g_BP.FillBuffer(64)) return 0;
// Check if the current BP has exceeded or reached the limit of 128
if (FillInternalBuffer(&g_BP.BP, 1, 128) < 128) return 0;
readpos = readbits + (int)g_BP.BP / 8;
const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8];
if (uint shift = (g_BP.BP & 7))
{
mask2 = 0xff >> shift;
mask.lo = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56);
mask.hi = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56);
u128 notMask;
u128 data = *(u128*)(readpos + 1);
notMask.lo = ~mask.lo & data.lo;
notMask.hi = ~mask.hi & data.hi;
notMask.lo >>= 8 - shift;
notMask.lo |= (notMask.hi & (ULLONG_MAX >> (64 - shift))) << (64 - shift);
notMask.hi >>= 8 - shift;
mask.hi = (((*(u128*)readpos).hi & mask.hi) << shift) | (((*(u128*)readpos).lo & mask.lo) >> (64 - shift));
mask.lo = ((*(u128*)readpos).lo & mask.lo) << shift;
notMask.lo |= mask.lo;
notMask.hi |= mask.hi;
*(u128*)address = notMask;
}
else
{
*(u128*)address = *(u128*)readpos;
}
if (advance) g_BP.BP += 128;
return 1;
}
// whenever reading fractions of bytes. The low bits always come from the next byte
// while the high bits come from the current byte
u8 __fastcall getBits64(u8 *address, u32 advance)
{
register u64 mask = 0;
u8* readpos;
// Check if the current BP has exceeded or reached the limit of 128
if (FillInternalBuffer(&g_BP.BP, 1, 64) < 64) return 0;
readpos = readbits + (int)g_BP.BP / 8;
if (uint shift = (g_BP.BP & 7))
{
mask = (0xff >> shift);
u64 mask = (0xff >> shift);
mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56);
*(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift);
@ -874,89 +755,76 @@ u8 __fastcall getBits64(u8 *address, u32 advance)
*(u64*)address = *(u64*)readpos;
}
if (advance) g_BP.BP += 64;
if (advance) g_BP.Advance(64);
return 1;
}
// whenever reading fractions of bytes. The low bits always come from the next byte
// while the high bits come from the current byte
u8 __fastcall getBits32(u8 *address, u32 advance)
__fi u8 getBits32(u8 *address, bool advance)
{
u32 mask;
u8* readpos;
if (!g_BP.FillBuffer(32)) return 0;
// Check if the current BP has exceeded or reached the limit of 128
if (FillInternalBuffer(&g_BP.BP, 1, 32) < 32) return 0;
readpos = readbits + (int)g_BP.BP / 8;
if (uint shift = (g_BP.BP & 7))
const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP/8];
if(uint shift = (g_BP.BP & 7))
{
mask = (0xff >> shift);
u32 mask = (0xff >> shift);
mask = mask | (mask << 8) | (mask << 16) | (mask << 24);
*(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift);
}
else
{
// Bit position-aligned -- no masking/shifting necessary
*(u32*)address = *(u32*)readpos;
}
if (advance) g_BP.BP += 32;
if (advance) g_BP.Advance(32);
return 1;
}
__fi u8 __fastcall getBits16(u8 *address, u32 advance)
__fi u8 getBits16(u8 *address, bool advance)
{
u32 mask;
u8* readpos;
if (!g_BP.FillBuffer(16)) return 0;
// Check if the current BP has exceeded or reached the limit of 128
if (FillInternalBuffer(&g_BP.BP, 1, 16) < 16) return 0;
readpos = readbits + (int)g_BP.BP / 8;
const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8];
if (uint shift = (g_BP.BP & 7))
{
mask = (0xff >> shift);
uint mask = (0xff >> shift);
mask = mask | (mask << 8);
*(u16*)address = ((~mask & *(u16*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u16*)readpos) << shift);
}
}
else
{
*(u16*)address = *(u16*)readpos;
}
}
if (advance) g_BP.BP += 16;
if (advance) g_BP.Advance(16);
return 1;
}
u8 __fastcall getBits8(u8 *address, u32 advance)
u8 getBits8(u8 *address, bool advance)
{
u32 mask;
u8* readpos;
if (!g_BP.FillBuffer(8)) return 0;
// Check if the current BP has exceeded or reached the limit of 128
if (FillInternalBuffer(&g_BP.BP, 1, 8) < 8)
return 0;
readpos = readbits + (int)g_BP.BP / 8;
const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8];
if (uint shift = (g_BP.BP & 7))
{
mask = (0xff >> shift);
{
uint mask = (0xff >> shift);
*(u8*)address = (((~mask) & readpos[1]) >> (8 - shift)) | (((mask) & *readpos) << shift);
}
}
else
{
*(u8*)address = *(u8*)readpos;
}
}
if (advance) g_BP.BP += 8;
if (advance) g_BP.Advance(8);
return 1;
}
@ -983,7 +851,7 @@ void IPUCMD_WRITE(u32 val)
case SCE_IPU_VDEC:
g_BP.BP += val & 0x3F;
g_BP.Advance(val & 0x3F);
// check if enough data in queue
if (ipuVDEC(val)) return;
@ -993,9 +861,11 @@ void IPUCMD_WRITE(u32 val)
break;
case SCE_IPU_FDEC:
IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, FP %d, CHCR 0x%x",
val & 0x3f, g_BP.IFC, (int)g_BP.BP, g_BP.FP, ipu1dma.chcr._u32);
g_BP.BP += val & 0x3F;
IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, CHCR 0x%x",
val & 0x3f, g_BP.IFC, (int)g_BP.BP, ipu1dma.chcr._u32);
g_BP.Advance(val & 0x3F);
if (ipuFDEC(val)) return;
ipuRegs.cmd.BUSY = 0x80000000;
ipuRegs.topbusy = 0x80000000;
@ -1009,7 +879,7 @@ void IPUCMD_WRITE(u32 val)
case SCE_IPU_SETIQ:
IPU_LOG("SETIQ command.");
if (val & 0x3f) IPU_LOG("Skip %d bits.", val & 0x3f);
g_BP.BP += val & 0x3F;
g_BP.Advance(val & 0x3F);
if (ipuSETIQ(val)) return;
break;

View File

@ -67,11 +67,66 @@ union tIPU_CTRL {
void reset() { _u32 = 0; }
};
struct tIPU_BP {
u32 BP; // Bit stream point
u16 IFC; // Input FIFO counter
u8 FP; // FIFO point
u8 bufferhasnew; // Always 0.
struct __aligned16 tIPU_BP {
__aligned16 u128 internal_qwc[2];
u32 BP; // Bit stream point (0 to 128*2)
u32 IFC; // Input FIFO counter (8QWC) (0 to 8)
u32 FP; // internal FIFO (2QWC) fill status (0 to 2)
__fi void Align()
{
BP = (BP + 7) & ~7;
Advance(0);
}
__fi void Advance(uint bits)
{
BP += bits;
pxAssume( BP <= 256 );
if (BP > 127)
{
BP -= 128;
if (FP == 2)
{
// when BP is over 128 it means we're reading data from the second quadword. Shift that one
// to the front and load the new quadword into the second QWC (its a manualized ringbuffer!)
CopyQWC(&internal_qwc[0], &internal_qwc[1]);
FP = 1;
}
else
{
// if FP == 1 then the buffer has been completely drained.
// if FP == 0 then an already-drained buffer is being advanced.
// In either case we just assign FP to 0.
FP = 0;
}
}
}
__fi bool FillBuffer(u32 bits)
{
while (FP < 2)
{
if (ipu_fifo.in.read(&internal_qwc[FP]) == 0)
{
// Here we *try* to fill the entire internal QWC buffer; however that may not necessarily
// be possible -- so if the fill fails we'll only return 0 if we don't have enough
// remaining bits in the FIFO to fill the request.
return ((FP!=0) && (BP + bits) <= 128);
}
++FP;
}
return true;
}
wxString desc() const
{
return wxsFormat(L"Ipu BP: bp = 0x%x, IFC = 0x%x, FP = 0x%x.", BP, IFC, FP);
@ -217,10 +272,9 @@ extern void IPUCMD_WRITE(u32 val);
extern void ipuSoftReset();
extern void IPUProcessInterrupt();
extern u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size);
extern u8 __fastcall getBits128(u8 *address, u32 advance);
extern u8 __fastcall getBits64(u8 *address, u32 advance);
extern u8 __fastcall getBits32(u8 *address, u32 advance);
extern u8 __fastcall getBits16(u8 *address, u32 advance);
extern u8 __fastcall getBits8(u8 *address, u32 advance);
extern u8 getBits128(u8 *address, bool advance);
extern u8 getBits64(u8 *address, bool advance);
extern u8 getBits32(u8 *address, bool advance);
extern u8 getBits16(u8 *address, bool advance);
extern u8 getBits8(u8 *address, bool advance);

View File

@ -19,7 +19,6 @@
#include "IPU/IPUdma.h"
#include "mpeg2lib/Mpeg.h"
__aligned16 IPU_Fifo ipu_fifo;
void IPU_Fifo::init()
@ -75,10 +74,7 @@ int IPU_Fifo_Input::write(u32* pMem, int size)
while (transsize-- > 0)
{
for (int i = 0; i <= 3; i++)
{
data[writepos + i] = pMem[i];
}
CopyQWC(&data[writepos], pMem);
writepos = (writepos + 4) & 31;
pMem += 4;
}
@ -86,118 +82,100 @@ int IPU_Fifo_Input::write(u32* pMem, int size)
return firsttrans;
}
int IPU_Fifo_Output::write(const u32 *value, int size)
{
int transsize, firsttrans;
if ((int)ipuRegs.ctrl.OFC >= 8) IPU0dma();
transsize = min(size, 8 - (int)ipuRegs.ctrl.OFC);
firsttrans = transsize;
while (transsize-- > 0)
{
for (int i = 0; i <= 3; i++)
{
data[writepos + i] = ((u32*)value)[i];
}
writepos = (writepos + 4) & 31;
value += 4;
}
ipuRegs.ctrl.OFC += firsttrans;
IPU0dma();
return firsttrans;
}
int IPU_Fifo_Input::read(void *value)
{
// wait until enough data to ensure proper streaming.
if (g_BP.IFC < 4)
if (g_BP.IFC < 3)
{
// IPU FIFO is empty and DMA is waiting so lets tell the DMA we are ready to put data in the FIFO
if(cpuRegs.eCycle[4] == 0x9999)
{
CPU_INT( DMAC_TO_IPU, 4 );
CPU_INT( DMAC_TO_IPU, 32 );
}
if (g_BP.IFC == 0) return 0;
pxAssert(g_BP.IFC > 0);
}
// transfer 1 qword, split into two transfers
for (int i = 0; i <= 3; i++)
{
((u32*)value)[i] = data[readpos + i];
data[readpos + i] = 0;
}
CopyQWC(value, &data[readpos]);
readpos = (readpos + 4) & 31;
g_BP.IFC--;
return 1;
}
void IPU_Fifo_Output::_readsingle(void *value)
int IPU_Fifo_Output::write(const u32 *value, uint size)
{
// transfer 1 qword, split into two transfers
for (int i = 0; i <= 3; i++)
pxAssumeMsg(size>0, "Invalid size==0 when calling IPU_Fifo_Output::write");
uint origsize = size;
do {
IPU0dma();
uint transsize = min(size, 8 - (uint)ipuRegs.ctrl.OFC);
if(!transsize) break;
ipuRegs.ctrl.OFC = transsize;
size -= transsize;
while (transsize > 0)
{
CopyQWC(&data[writepos], value);
writepos = (writepos + 4) & 31;
value += 4;
--transsize;
}
} while(true);
return origsize - size;
#if 0
if (ipuRegs.ctrl.OFC >= 8) IPU0dma();
uint transsize = min(size, 8 - (uint)ipuRegs.ctrl.OFC);
uint firsttrans = transsize;
while (transsize > 0)
{
((u32*)value)[i] = data[readpos + i];
data[readpos + i] = 0;
CopyQWC(&data[writepos], value);
writepos = (writepos + 4) & 31;
value += 4;
--transsize;
}
readpos = (readpos + 4) & 31;
ipuRegs.ctrl.OFC += firsttrans;
IPU0dma();
return firsttrans;
#endif
}
void IPU_Fifo_Output::read(void *value, int size)
void IPU_Fifo_Output::read(void *value, uint size)
{
pxAssume(ipuRegs.ctrl.OFC >= size);
ipuRegs.ctrl.OFC -= size;
// Zeroing the read data is not needed, since the ringbuffer design will never read back
// the zero'd data anyway. --air
//__m128 zeroreg = _mm_setzero_ps();
while (size > 0)
{
_readsingle(value);
value = (u32*)value + 4;
size--;
CopyQWC(value, &data[readpos]);
//_mm_store_ps((float*)&data[readpos], zeroreg);
readpos = (readpos + 4) & 31;
value = (u128*)value + 1;
--size;
}
}
void IPU_Fifo_Output::readsingle(void *value)
{
if (ipuRegs.ctrl.OFC > 0)
{
ipuRegs.ctrl.OFC--;
_readsingle(value);
}
}
__fi bool decoder_t::ReadIpuData(u128* out)
{
if(ipu0_data == 0)
{
IPU_LOG( "ReadFIFO/IPUout -> (fifo empty/no data available)" );
return false;
}
CopyQWC(out, GetIpuDataPtr());
--ipu0_data;
++ipu0_idx;
IPU_LOG( "ReadFIFO/IPUout -> %ls", out->ToString().c_str() );
return true;
}
void __fastcall ReadFIFO_IPUout(mem128_t* out)
{
// FIXME! When ReadIpuData() doesn't succeed (returns false), the EE should probably stall
// until a value becomes available. This isn't exactly easy to do since the virtualized EE
// in PCSX2 *has* to be running in order for the IPU DMA to upload new input data to allow
// IPUout's FIFO to fill. Thus if we implement an EE stall, PCSX2 deadlocks. Grr. --air
if (!pxAssertDev( ipuRegs.ctrl.OFC > 0, "Attempted read from IPUout's FIFO, but the FIFO is empty!" )) return;
ipu_fifo.out.read(out, 1);
if (decoder.ReadIpuData(out))
{
ipu_fifo.out.readpos = (ipu_fifo.out.readpos + 4) & 31;
}
// Games should always check the fifo before reading from it -- so if the FIFO has no data
// its either some glitchy game or a bug in pcsx2.
}
void __fastcall WriteFIFO_IPUin(const mem128_t* value)

View File

@ -37,13 +37,10 @@ struct IPU_Fifo_Output
int readpos, writepos;
// returns number of qw read
int write(const u32 * value, int size);
void read(void *value,int size);
void readsingle(void *value);
int write(const u32 * value, uint size);
void read(void *value, uint size);
void clear();
wxString desc() const;
void _readsingle(void *value);
};
struct IPU_Fifo

View File

@ -189,7 +189,7 @@ int IPU1dma()
{
if(!WaitGSPaths())
{ // legacy WaitGSPaths() for now
IPU_INT_TO(4); //Give it a short wait.
IPU_INT_TO(32); //Give it a short wait.
return totalqwc;
}
IPU_LOG("Processing Normal QWC left %x Finished %d In Progress %d", ipu1dma.qwc, IPU1Status.DMAFinished, IPU1Status.InProgress);
@ -203,7 +203,7 @@ int IPU1dma()
{
if(!WaitGSPaths())
{ // legacy WaitGSPaths() for now
IPU_INT_TO(4); //Give it a short wait.
IPU_INT_TO(32); //Give it a short wait.
return totalqwc;
}
IPU_LOG("Processing Chain QWC left %x Finished %d In Progress %d", ipu1dma.qwc, IPU1Status.DMAFinished, IPU1Status.InProgress);
@ -283,7 +283,7 @@ int IPU1dma()
if(!WaitGSPaths() && ipu1dma.qwc > 0)
{ // legacy WaitGSPaths() for now
IPU_INT_TO(4); //Give it a short wait.
IPU_INT_TO(32); //Give it a short wait.
return totalqwc;
}
IPU_LOG("Processing Start Chain QWC left %x Finished %d In Progress %d", ipu1dma.qwc, IPU1Status.DMAFinished, IPU1Status.InProgress);
@ -312,8 +312,9 @@ int IPU1dma()
int IPU0dma()
{
if(!ipuRegs.ctrl.OFC) return 0;
int readsize;
static int totalsize = 0;
tDMA_TAG* pMem;
if ((!(ipu0dma.chcr.STR) || (cpuRegs.interrupt & (1 << DMAC_FROM_IPU))) || (ipu0dma.qwc == 0))
@ -329,7 +330,6 @@ int IPU0dma()
pMem = dmaGetAddr(ipu0dma.madr, true);
readsize = min(ipu0dma.qwc, (u16)ipuRegs.ctrl.OFC);
totalsize+=readsize;
ipu_fifo.out.read(pMem, readsize);
ipu0dma.madr += readsize << 4;
@ -363,7 +363,6 @@ int IPU0dma()
//This broke vids in Digital Devil Saga
//Note that interrupting based on totalsize is just guessing..
IPU_INT_FROM( readsize * BIAS );
totalsize = 0;
}
return readsize;

View File

@ -47,10 +47,14 @@ const int non_linear_quantizer_scale [] =
into 1st slot is copied to the 2nd slot. Which will later be copied
back to the 1st slot when 128bits have been read.
*/
extern void ReorderBitstream();
const DCTtab * tab;
int mbaCount = 0;
int bitstream_init ()
{
return g_BP.FillBuffer(32);
}
int get_macroblock_modes()
{
int macroblock_modes;
@ -221,9 +225,7 @@ int __fi get_motion_delta(const int f_code)
int __fi get_dmv()
{
const DMVtab * tab;
tab = DMV_2 + UBITS(2);
const DMVtab* tab = DMV_2 + UBITS(2);
DUMPBITS(tab->len);
return tab->dmv;
}
@ -239,22 +241,21 @@ int get_macroblock_address_increment()
else if (code >= 768)
mba = MBA.mba11 + (UBITS(11) - 24);
else switch (UBITS(11))
{
{
case 8: /* macroblock_escape */
DUMPBITS(11);
return 0x23;
case 8: /* macroblock_escape */
case 15: /* macroblock_stuffing (MPEG1 only) */
if (decoder.mpeg1)
{
DUMPBITS(11);
return 0x23;
return 0x22;
}
case 15: /* macroblock_stuffing (MPEG1 only) */
if (decoder.mpeg1)
{
DUMPBITS(11);
return 0x22;
}
default:
return 0;//error
}
default:
return 0;//error
}
DUMPBITS(mba->len);
@ -336,11 +337,8 @@ do { \
val = (((s32)val) >> 31) ^ 2047; \
} while (0)
static __fi bool get_intra_block()
static bool get_intra_block()
{
int i;
int j;
int val;
const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
const u8 (&quant_matrix)[64] = decoder.iq;
int quantizer_scale = decoder.quantizer_scale;
@ -348,7 +346,7 @@ static __fi bool get_intra_block()
u16 code;
/* decode AC coefficients */
for (i=1 + ipu_cmd.pos[4]; ; i++)
for (int i=1 + ipu_cmd.pos[4]; ; i++)
{
switch (ipu_cmd.pos[5])
{
@ -427,60 +425,65 @@ static __fi bool get_intra_block()
return true;
}
i+= tab->run == 65 ? GETBITS(6) : tab->run;
i += (tab->run == 65) ? GETBITS(6) : tab->run;
if (i >= 64)
{
ipu_cmd.pos[4] = 0;
return true;
}
case 1:
if (!GETWORD())
{
ipu_cmd.pos[4] = i - 1;
ipu_cmd.pos[5] = 1;
return false;
{
if (!GETWORD())
{
ipu_cmd.pos[4] = i - 1;
ipu_cmd.pos[5] = 1;
return false;
}
uint j = scan[i];
int val;
if (tab->run==65) /* escape */
{
if(!decoder.mpeg1)
{
val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
DUMPBITS(12);
}
else
{
val = SBITS(8);
DUMPBITS(8);
if (!(val & 0x7f))
{
val = GETBITS(8) + 2 * val;
}
val = (val * quantizer_scale * quant_matrix[i]) >> 4;
val = (val + ~ (((s32)val) >> 31)) | 1;
}
}
else
{
val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
if(decoder.mpeg1)
{
/* oddification */
val = (val - 1) | 1;
}
/* if (bitstream_get (1)) val = -val; */
int bit1 = SBITS(1);
val = (val ^ bit1) - bit1;
DUMPBITS(1);
}
SATURATE(val);
dest[j] = val;
ipu_cmd.pos[5] = 0;
}
j = scan[i];
if (tab->run==65) /* escape */
{
if(!decoder.mpeg1)
{
val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
DUMPBITS(12);
}
else
{
val = SBITS(8);
DUMPBITS(8);
if (!(val & 0x7f))
{
val = GETBITS(8) + 2 * val;
}
val = (val * quantizer_scale * quant_matrix[i]) >> 4;
val = (val + ~ (((s32)val) >> 31)) | 1;
}
}
else
{
val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
if(decoder.mpeg1)
{
/* oddification */
val = (val - 1) | 1;
}
/* if (bitstream_get (1)) val = -val; */
val = (val ^ SBITS(1)) - SBITS(1);
DUMPBITS(1);
}
SATURATE(val);
dest[j] = val;
ipu_cmd.pos[5] = 0;
}
}
@ -488,7 +491,7 @@ static __fi bool get_intra_block()
return true;
}
static __fi bool get_non_intra_block(int * last)
static bool get_non_intra_block(int * last)
{
int i;
int j;
@ -614,8 +617,9 @@ static __fi bool get_non_intra_block(int * last)
}
else
{
int bit1 = SBITS(1);
val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
val = (val ^ SBITS(1)) - SBITS(1);
val = (val ^ bit1) - bit1;
DUMPBITS(1);
}
@ -682,25 +686,11 @@ void __fi finishmpeg2sliceIDEC()
{
ipuRegs.ctrl.SCD = 0;
coded_block_pattern = decoder.coded_block_pattern;
g_BP.BP += decoder.bitstream_bits - 16;
if ((int)g_BP.BP < 0)
{
g_BP.BP = 128 + (int)g_BP.BP;
// After BP is positioned correctly, we need to reload the old buffer
// so that reading may continue properly
ReorderBitstream();
}
FillInternalBuffer(&g_BP.BP, 1, 0);
}
bool mpeg2sliceIDEC()
{
u16 code;
u8 bit8;
switch (ipu_cmd.pos[0])
{
@ -798,6 +788,9 @@ bool mpeg2sliceIDEC()
ipu_cmd.pos[2] = 6;
return false;
}
break;
jNO_DEFAULT;
}
// Send The MacroBlock via DmaIpuFrom
@ -812,23 +805,23 @@ bool mpeg2sliceIDEC()
}
case 2:
while (decoder.ipu0_data > 0)
{
uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
{
pxAssume(decoder.ipu0_data > 0);
if (read == 0)
{
ipu_cmd.pos[1] = 2;
return false;
}
else
{
decoder.AdvanceIpuDataBy(read);
}
uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
decoder.AdvanceIpuDataBy(read);
if (decoder.ipu0_data != 0)
{
// IPU FIFO filled up -- Will have to finish transferring later.
ipu_cmd.pos[1] = 2;
return false;
}
decoder.mbc++;
mbaCount = 0;
}
case 3:
while (1)
{
@ -851,18 +844,18 @@ bool mpeg2sliceIDEC()
}
else switch (UBITS(11))
{
case 8: /* macroblock_escape */
mbaCount += 33;
/* pass through */
case 8: /* macroblock_escape */
mbaCount += 33;
/* pass through */
case 15: /* macroblock_stuffing (MPEG1 only) */
DUMPBITS(11);
continue;
case 15: /* macroblock_stuffing (MPEG1 only) */
DUMPBITS(11);
continue;
default: /* end of slice/frame, or error? */
{
goto finish_idec;
}
default: /* end of slice/frame, or error? */
{
goto finish_idec;
}
}
}
@ -886,17 +879,20 @@ bool mpeg2sliceIDEC()
}
break;
jNO_DEFAULT;
}
ipu_cmd.pos[1] = 0;
ipu_cmd.pos[2] = 0;
}
finish_idec:
finishmpeg2sliceIDEC();
case 3:
bit8 = 1;
{
u8 bit8;
if (!getBits8((u8*)&bit8, 0))
{
ipu_cmd.pos[0] = 3;
@ -905,10 +901,10 @@ finish_idec:
if (bit8 == 0)
{
if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7);
g_BP.Align();
ipuRegs.ctrl.SCD = 1;
}
}
case 4:
if (!getBits32((u8*)&ipuRegs.top, 0))
@ -917,8 +913,10 @@ finish_idec:
return false;
}
BigEndian(ipuRegs.top, ipuRegs.top);
ipuRegs.top = BigEndian(ipuRegs.top);
break;
jNO_DEFAULT;
}
return true;
@ -927,7 +925,6 @@ finish_idec:
bool mpeg2_slice()
{
int DCT_offset, DCT_stride;
u8 bit8;
macroblock_8& mb8 = decoder.mb8;
macroblock_16& mb16 = decoder.mb16;
@ -1010,9 +1007,35 @@ bool mpeg2_slice()
return false;
}
break;
jNO_DEFAULT;
}
ipu_copy(mb8, mb16);
// Copy macroblock8 to macroblock16 - without sign extension.
// Manually inlined due to MSVC refusing to inline the SSE-optimized version.
{
const u8 *s = (const u8*)&mb8;
u16 *d = (u16*)&mb16;
//Y bias - 16 * 16
//Cr bias - 8 * 8
//Cb bias - 8 * 8
__m128i zeroreg = _mm_setzero_si128();
for (uint i = 0; i < (256+64+64) / 32; ++i)
{
//*d++ = *s++;
__m128i woot1 = _mm_load_si128((__m128i*)s);
__m128i woot2 = _mm_load_si128((__m128i*)s+1);
_mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg));
_mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg));
_mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg));
_mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg));
s += 32;
d += 32;
}
}
}
else
{
@ -1077,6 +1100,8 @@ bool mpeg2_slice()
}
}
break;
jNO_DEFAULT;
}
}
}
@ -1084,40 +1109,31 @@ bool mpeg2_slice()
// Send The MacroBlock via DmaIpuFrom
ipuRegs.ctrl.SCD = 0;
coded_block_pattern = decoder.coded_block_pattern;
g_BP.BP += (int)decoder.bitstream_bits - 16;
// BP goes from 0 to 128, so negative values mean to read old buffer
// so we minus from 128 to get the correct BP
if ((int)g_BP.BP < 0)
{
g_BP.BP = 128 + (int)g_BP.BP;
// After BP is positioned correctly, we need to reload the old buffer
// so that reading may continue properly
ReorderBitstream();
}
decoder.mbc = 1;
decoder.SetOutputTo(mb16);
case 3:
while (decoder.ipu0_data > 0)
{
uint size = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
{
pxAssume(decoder.ipu0_data > 0);
if (size == 0)
{
ipu_cmd.pos[0] = 3;
return false;
}
else
{
decoder.AdvanceIpuDataBy(size);
}
uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
decoder.AdvanceIpuDataBy(read);
if (decoder.ipu0_data != 0)
{
// IPU FIFO filled up -- Will have to finish transferring later.
ipu_cmd.pos[0] = 3;
return false;
}
decoder.mbc++;
mbaCount = 0;
}
case 4:
bit8 = 1;
{
u8 bit8;
if (!getBits8((u8*)&bit8, 0))
{
ipu_cmd.pos[0] = 4;
@ -1126,11 +1142,11 @@ bool mpeg2_slice()
if (bit8 == 0)
{
if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7);
g_BP.Align();
ipuRegs.ctrl.SCD = 1;
}
}
case 5:
if (!getBits32((u8*)&ipuRegs.top, 0))
{
@ -1138,8 +1154,7 @@ bool mpeg2_slice()
return false;
}
BigEndian(ipuRegs.top, ipuRegs.top);
decoder.bitstream_bits = 0;
ipuRegs.top = BigEndian(ipuRegs.top);
break;
}

View File

@ -148,12 +148,12 @@ struct decoder_t {
macroblock_rgb32 rgb32;
macroblock_rgb16 rgb16;
uint ipu0_data;
uint ipu0_data; // amount of data in the output macroblock (in QWC)
uint ipu0_idx;
/* bit parsing stuff */
u32 bitstream_buf; /* current 32 bit working set */
int bitstream_bits; /* used bits in working set */
//u32 bitstream_buf; /* current 32 bit working set */
//int bitstream_bits; /* used bits in working set */
int quantizer_scale; /* remove */
int dmv_offset; /* remove */
@ -230,7 +230,7 @@ struct decoder_t {
ipu0_data -= amt;
}
bool ReadIpuData(u128* out);
__fi bool ReadIpuData(u128* out);
};
struct mpeg2_scan_pack
@ -241,6 +241,10 @@ struct mpeg2_scan_pack
mpeg2_scan_pack();
};
extern int bitstream_init ();
extern u32 UBITS(uint bits);
extern s32 SBITS(uint bits);
extern void mpeg2_idct_copy(s16 * block, u8* dest, int stride);
extern void mpeg2_idct_add(int last, s16 * block, s16* dest, int stride);
@ -258,20 +262,19 @@ extern int get_dmv();
extern void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn);
extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte);
extern void ipu_vq(macroblock_rgb16& rgb16, u8* indx4);
extern void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16);
extern int slice (u8 * buffer);
#ifdef _MSC_VER
#define BigEndian(out, in) out = _byteswap_ulong(in)
#define BigEndian(in) _byteswap_ulong(in)
#else
#define BigEndian(out, in) out = __builtin_bswap32(in) // or we could use the asm function bswap...
#define BigEndian(in) __builtin_bswap32(in) // or we could use the asm function bswap...
#endif
#ifdef _MSC_VER
#define BigEndian64(out, in) out = _byteswap_uint64(in)
#define BigEndian64(in) _byteswap_uint64(in)
#else
#define BigEndian64(out, in) out = __builtin_bswap64(in) // or we could use the asm function bswap...
#define BigEndian64(in) __builtin_bswap64(in) // or we could use the asm function bswap...
#endif
extern __aligned16 const mpeg2_scan_pack mpeg2_scan;

View File

@ -30,65 +30,24 @@
#ifndef __VLC_H__
#define __VLC_H__
//static u8 word[4];
//static u8 dword[8];
//static u8 qword[16];
static __fi int GETWORD()
{
static u8 data[2];
if (decoder.bitstream_bits > 0)
{
if(!getBits16(data,1))
{
return 0;
}
/*u32 data;
BigEndian(data, *(u32*)word);
decoder.bitstream_buf |= (u64)data << decoder.bitstream_bits;
decoder.bitstream_bits -= 32;*/
decoder.bitstream_buf |= (((u32)data[0] << 8) | data[1]) << decoder.bitstream_bits;
decoder.bitstream_bits -= 16;
}
return 1;
return g_BP.FillBuffer(16);
}
static __fi int bitstream_init ()
// Removes bits from the bitstream. This is done independently of UBITS/SBITS because a
// lot of mpeg streams have to read ahead and rewind bits and re-read them at different
// bit depths or sign'age.
static __fi void DUMPBITS(uint num)
{
if (!getBits32((u8*)&decoder.bitstream_buf, 1))
{
return 0;
}
decoder.bitstream_bits = -16;
BigEndian(decoder.bitstream_buf, decoder.bitstream_buf);
/*decoder.bitstream_buf = *(u64*)dword;
BigEndian64(decoder.bitstream_buf, decoder.bitstream_buf);*/
return 1;
g_BP.Advance(num);
//pxAssume(g_BP.FP != 0);
}
/* remove num valid bits from bit_buf */
static __fi void DUMPBITS(int num)
static __fi u32 GETBITS(uint num)
{
decoder.bitstream_buf <<= num;
decoder.bitstream_bits += num;
}
/* take num bits from the high part of bit_buf and zero extend them */
#define UBITS(num) (((u32)decoder.bitstream_buf) >> (32 - (num)))
/* take num bits from the high part of bit_buf and sign extend them */
#define SBITS(num) (((s32)decoder.bitstream_buf) >> (32 - (num)))
/* Get bits from bitstream */
static __fi u32 GETBITS(int num)
{
u16 retVal = UBITS(num);
DUMPBITS(num);
uint retVal = UBITS(num);
g_BP.Advance(num);
return retVal;
}

View File

@ -130,7 +130,7 @@ __ri void cpuException(u32 code, u32 bd)
//Reset / NMI
cpuRegs.pc = 0xBFC00000;
Console.Warning("Reset request");
UpdateCP0Status();
cpuUpdateOperationMode();
return;
}
else if((code & 0x38000) == 0x10000)
@ -167,7 +167,7 @@ __ri void cpuException(u32 code, u32 bd)
else
cpuRegs.pc = 0xBFC00200 + offset;
UpdateCP0Status();
cpuUpdateOperationMode();
}
void cpuTlbMiss(u32 addr, u32 bd, u32 excode)
@ -196,7 +196,7 @@ void cpuTlbMiss(u32 addr, u32 bd, u32 excode)
}
cpuRegs.CP0.n.Status.b.EXL = 1;
UpdateCP0Status();
cpuUpdateOperationMode();
// Log=1; varLog|= 0x40000000;
}
@ -208,33 +208,6 @@ void cpuTlbMissW(u32 addr, u32 bd) {
cpuTlbMiss(addr, bd, EXC_CODE_TLBS);
}
__fi void _cpuTestMissingINTC() {
if (cpuRegs.CP0.n.Status.val & 0x400 &&
psHu32(INTC_STAT) & psHu32(INTC_MASK)) {
if ((cpuRegs.interrupt & (1 << 30)) == 0) {
Console.Error("*PCSX2*: Error, missing INTC Interrupt");
}
}
}
__fi void _cpuTestMissingDMAC() {
if (cpuRegs.CP0.n.Status.val & 0x800 &&
(psHu16(0xe012) & psHu16(0xe010) ||
psHu16(0xe010) & 0x8000)) {
if ((cpuRegs.interrupt & (1 << 31)) == 0) {
Console.Error("*PCSX2*: Error, missing DMAC Interrupt");
}
}
}
void cpuTestMissingHwInts() {
if ((cpuRegs.CP0.n.Status.val & 0x10007) == 0x10001) {
_cpuTestMissingINTC();
_cpuTestMissingDMAC();
// _cpuTestTIMR();
}
}
// sets a branch test to occur some time from an arbitrary starting point.
__fi void cpuSetNextEvent( u32 startCycle, s32 delta )
{
@ -253,7 +226,7 @@ __fi void cpuSetNextEventDelta( s32 delta )
cpuSetNextEvent( cpuRegs.cycle, delta );
}
// tests the cpu cycle agaisnt the given start and delta values.
// tests the cpu cycle against the given start and delta values.
// Returns true if the delta time has passed.
__fi int cpuTestCycle( u32 startCycle, s32 delta )
{
@ -361,8 +334,8 @@ static bool cpuIntsEnabled(int Interrupt)
{
bool IntType = !!(cpuRegs.CP0.n.Status.val & Interrupt); //Choose either INTC or DMAC, depending on what called it
return cpuRegs.CP0.n.Status.b.EIE && cpuRegs.CP0.n.Status.b.IE &&
!cpuRegs.CP0.n.Status.b.EXL && (cpuRegs.CP0.n.Status.b.ERL == 0) && IntType;
return IntType && cpuRegs.CP0.n.Status.b.EIE && cpuRegs.CP0.n.Status.b.IE &&
!cpuRegs.CP0.n.Status.b.EXL && (cpuRegs.CP0.n.Status.b.ERL == 0);
}
// if cpuRegs.cycle is greater than this cycle, should check cpuEventTest for updates
@ -375,10 +348,19 @@ __fi void _cpuEventTest_Shared()
ScopedBool etest(eeEventTestIsActive);
g_nextEventCycle = cpuRegs.cycle + eeWaitCycles;
// ---- INTC / DMAC (CPU-level Exceptions) -----------------
// Done first because exceptions raised during event tests need to be postponed a few
// cycles (fixes Grandia II [PAL], which does a spin loop on a vsync and expects to
// be able to read the value before the exception handler clears it).
uint mask = intcInterrupt() | dmacInterrupt();
if (cpuIntsEnabled(mask)) cpuException(mask, cpuRegs.branch);
// ---- Counters -------------
// Important: the vsync counter must be the first to be checked. It includes emulation
// escape/suspend hooks, and it's really a good idea to suspend/resume emulation before
// doing any actual meaninful branchtest logic.
// doing any actual meaningful branchtest logic.
if( cpuTestCycle( nextsCounter, nextCounter ) )
{
@ -391,10 +373,10 @@ __fi void _cpuEventTest_Shared()
_cpuTestTIMR();
// ---- Interrupts -------------
// Handles all interrupts except 30 and 31, which are handled later.
// These are basically just DMAC-related events, which also piggy-back the same bits as
// the PS2's own DMA channel IRQs and IRQ Masks.
if( cpuRegs.interrupt & ~(3<<30) )
_cpuTestInterrupts();
_cpuTestInterrupts();
// ---- IOP -------------
// * It's important to run a iopEventTest before calling ExecuteBlock. This
@ -418,11 +400,7 @@ __fi void _cpuEventTest_Shared()
//if( EEsCycle < -450 )
// Console.WriteLn( " IOP ahead by: %d cycles", -EEsCycle );
// Experimental and Probably Unnecessary Logic -->
// Check if the EE already has an exception pending, and if so we shouldn't
// waste too much time updating the IOP. Theory being that the EE and IOP should
// run closely in sync during raised exception events. But in practice it didn't
// seem to make much of a difference.
EEsCycle = psxCpu->ExecuteBlock( EEsCycle );
iopEventAction = false;
}
@ -456,22 +434,10 @@ __fi void _cpuEventTest_Shared()
// Apply vsync and other counter nextCycles
cpuSetNextEvent( nextsCounter, nextCounter );
// ---- INTC / DMAC Exceptions -----------------
// Raise the INTC and DMAC interrupts here, which usually throw exceptions.
// This should be done last since the IOP and the VU0 can raise several EE
// exceptions.
//if ((cpuRegs.CP0.n.Status.val & 0x10007) == 0x10001)
if( cpuIntsEnabled(0x400) ) TESTINT(30, intcInterrupt);
if( cpuIntsEnabled(0x800) ) TESTINT(31, dmacInterrupt);
}
__ri void cpuTestINTCInts()
{
// Check the internal Event System -- if one's already scheduled then don't bother:
if( cpuRegs.interrupt & (1 << 30) ) return;
// Check the COP0's Status register for general interrupt disables, and the 0x400
// bit (which is INTC master toggle).
if( !cpuIntsEnabled(0x400) ) return;
@ -488,9 +454,6 @@ __ri void cpuTestINTCInts()
__fi void cpuTestDMACInts()
{
// Check the internal Event System -- if one's already scheduled then don't bother:
if ( cpuRegs.interrupt & (1 << 31) ) return;
// Check the COP0's Status register for general interrupt disables, and the 0x800
// bit (which is the DMAC master toggle).
if( !cpuIntsEnabled(0x800) ) return;

View File

@ -403,8 +403,8 @@ enum EE_EventType
};
extern void CPU_INT( EE_EventType n, s32 ecycle );
extern void intcInterrupt();
extern void dmacInterrupt();
extern uint intcInterrupt();
extern uint dmacInterrupt();
extern void cpuInit();

View File

@ -24,7 +24,7 @@
// the lower 16 bit value. IF the change is breaking of all compatibility with old
// states, increment the upper 16 bit value, and clear the lower 16 bits to 0.
static const u32 g_SaveVersion = 0x8b4a0000;
static const u32 g_SaveVersion = 0x8b4b0000;
// this function is meant to be used in the place of GSfreeze, and provides a safe layer
// between the GS saving function and the MTGS's needs. :)

View File

@ -92,9 +92,9 @@ set(zzoglHeaders
Util.h
x86.h
zerogs.h
zerogsmath.h
zpipe.h
ZZoglCRTC.h
ZZoglMath.h
ZZoglShaders.h
ZZGl.h
ZZLog.h)

View File

@ -32,7 +32,6 @@ LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
switch (msg)
{
case WM_DESTROY:
PostQuitMessage(0);
return 0;
@ -76,21 +75,21 @@ bool GLWindow::CreateWindow(void *pDisplay)
rc.bottom = conf.height;
WNDCLASSEX wc;
HINSTANCE hInstance = GetModuleHandle(NULL);
HINSTANCE hInstance = GetModuleHandle(NULL); // Grab An Instance For Our Window
DWORD dwExStyle, dwStyle;
wc.cbSize = sizeof(WNDCLASSEX);
wc.style = CS_CLASSDC;
wc.lpfnWndProc = (WNDPROC) MsgProc;
wc.cbClsExtra = 0;
wc.cbWndExtra = 0;
wc.hInstance = hInstance;
wc.hIcon = NULL;
wc.hIconSm = NULL;
wc.hCursor = NULL;
wc.hbrBackground = NULL;
wc.lpszMenuName = NULL;
wc.lpszClassName = "PS2EMU_ZEROGS";
wc.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC; // Redraw On Move, And Own DC For Window
wc.lpfnWndProc = (WNDPROC) MsgProc; // MsgProc Handles Messages
wc.cbClsExtra = 0; // No Extra Window Data
wc.cbWndExtra = 0; // No Extra Window Data
wc.hInstance = hInstance; // Set The Instance
wc.hIcon = NULL;
wc.hIconSm = NULL; // Load The Default Icon
wc.hCursor = LoadCursor(NULL, IDC_ARROW); // Load The Arrow Pointer
wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH); // No Background Required For GL
wc.lpszMenuName = NULL; // We Don't Want A Menu
wc.lpszClassName = "PS2EMU_ZEROGS"; // Set The Class Name
RegisterClassEx(&wc);
@ -102,26 +101,26 @@ bool GLWindow::CreateWindow(void *pDisplay)
else
{
dwExStyle = WS_EX_APPWINDOW | WS_EX_WINDOWEDGE;
dwStyle = WS_OVERLAPPEDWINDOW;
dwStyle = WS_OVERLAPPEDWINDOW | WS_BORDER;
}
dwStyle |= WS_CLIPSIBLINGS | WS_CLIPCHILDREN;
AdjustWindowRectEx(&rc, dwStyle, false, dwExStyle);
GetWindowRect(GetDesktopWindow(), &rcdesktop);
GShwnd = CreateWindowEx(
dwExStyle,
"PS2EMU_ZEROGS",
"ZeroGS",
dwStyle,
(rcdesktop.right - (rc.right - rc.left)) / 2,
(rcdesktop.bottom - (rc.bottom - rc.top)) / 2,
rc.right - rc.left,
rc.bottom - rc.top,
NULL,
NULL,
hInstance,
NULL);
GShwnd = CreateWindowEx( dwExStyle, // Extended Style For The Window
"PS2EMU_ZEROGS", // Class Name
"ZZOgl", // Window Title
dwStyle, // Selected Window Style
(rcdesktop.right - (rc.right - rc.left)) / 2, // Window Position
(rcdesktop.bottom - (rc.bottom - rc.top)) / 2, // Window Position
rc.right - rc.left, // Calculate Adjusted Window Width
rc.bottom - rc.top, // Calculate Adjusted Window Height
NULL, // No Parent Window
NULL, // No Menu
hInstance, // Instance
NULL); // Don't Pass Anything To WM_CREATE
if (GShwnd == NULL) return false;
@ -197,6 +196,7 @@ bool GLWindow::DisplayWindow(int _width, int _height)
dwExStyle = WS_EX_APPWINDOW | WS_EX_WINDOWEDGE;
dwStyle = WS_OVERLAPPEDWINDOW;
}
dwStyle |= WS_CLIPSIBLINGS | WS_CLIPCHILDREN;
RECT rc;

View File

@ -469,10 +469,10 @@ __forceinline void _TransferLocalLocal_4()
assert((gs.srcbuf.psm&0x7) == (gs.dstbuf.psm&0x7));
if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw)
ZZLog::Warn_Log("Transfer error, src width exceeded.");
ZZLog::Debug_Log("Transfer error, src width exceeded.");
if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw)
ZZLog::Warn_Log("Transfer error, dst width exceeded.");
ZZLog::Debug_Log("Transfer error, dst width exceeded.");
int srcstart, srcend, dststart, dstend;

View File

@ -267,7 +267,7 @@ void fill_block(BLOCK b, vector<char>& vBlockData, vector<char>& vBilinearData,
}
if (floatfmt) {
Vector* psrcv = (Vector*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
float4* psrcv = (float4*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
for(int i = 0; i < b.height; ++i)
{
@ -276,7 +276,7 @@ void fill_block(BLOCK b, vector<char>& vBlockData, vector<char>& vBilinearData,
for(int j = 0; j < b.width; ++j)
{
u32 temp = ((j + 1) % b.width);
Vector* pv = &psrcv[i_width + j];
float4* pv = &psrcv[i_width + j];
pv->x = psrcf[i_width + j];
pv->y = psrcf[i_width + temp];
pv->z = psrcf[i_width2 + j];
@ -291,7 +291,7 @@ void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, in
FUNCLOG
if (floatfmt) {
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 4);
vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(Vector));
vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4));
} else {
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 2);
}

View File

@ -120,8 +120,8 @@ struct BLOCK
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
// shader constants for this block
Vector vTexBlock;
Vector vTexDims;
float4 vTexBlock;
float4 vTexDims;
int width, height; // dims of one page in pixels
int ox, oy, mult;
int bpp;
@ -147,8 +147,8 @@ struct BLOCK
ox = ox2;
oy = oy2;
mult = mult2;
vTexDims = Vector(BLOCK_TEXWIDTH/(float)(bw), BLOCK_TEXHEIGHT/(float)bh, 0, 0);
vTexBlock = Vector((float)bw/BLOCK_TEXWIDTH, (float)bh/BLOCK_TEXHEIGHT, ((float)ox+0.2f)/BLOCK_TEXWIDTH, ((float)oy+0.05f)/BLOCK_TEXHEIGHT);
vTexDims = float4(BLOCK_TEXWIDTH/(float)(bw), BLOCK_TEXHEIGHT/(float)bh, 0, 0);
vTexBlock = float4((float)bw/BLOCK_TEXWIDTH, (float)bh/BLOCK_TEXHEIGHT, ((float)ox+0.2f)/BLOCK_TEXWIDTH, ((float)oy+0.05f)/BLOCK_TEXHEIGHT);
width = bw;
height = bh;
colwidth = bh / 4;

View File

@ -638,7 +638,7 @@ void __gifCall GIFRegHandlerSCISSOR(const u32* data)
Flush();
}
m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR;
m_env.CTXT[i].SCISSOR = (Vector4i)r->SCISSOR;
m_env.CTXT[i].UpdateScissor();*/
ZZLog::Greg_Log("SCISSOR%d", i);

View File

@ -55,7 +55,7 @@ inline bool NoHighlights(int i)
// if ( results[resultA] == 0 ) {
// results[resultA] = 1;
// ZZLog::ERROR_LOG("%x = %d %d %d %d %d %d %d %d \n", resultA, prim->iip, (prim->tme), (prim->fge), (prim->abe) , (prim->aa1) ,(prim->fst), (prim->ctxt), (prim->fix)) ;
// ZZLog::Error_Log("%x = %d %d %d %d %d %d %d %d \n", resultA, prim->iip, (prim->tme), (prim->fge), (prim->abe) , (prim->aa1) ,(prim->fst), (prim->ctxt), (prim->fix)) ;
// }
// if (resultA == 0xb && ZeroGS::vb[i].zbuf.zmsk ) return false; //ATF

View File

@ -52,7 +52,7 @@ extern "C" u32 CALLBACK PS2EgetLibType(void);
extern "C" u32 CALLBACK PS2EgetLibVersion2(u32 type);
extern "C" char* CALLBACK PS2EgetLibName(void);
#include "zerogsmath.h"
#include "ZZoglMath.h"
#include <vector>
#include <string>

View File

@ -37,62 +37,27 @@ void CALLBACK GSkeyEvent(keyEvent *ev)
#include "Win32/resource.h"
BOOL CALLBACK LoggingDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam)
{
switch (uMsg)
{
case WM_INITDIALOG:
if (conf.log) CheckDlgButton(hW, IDC_LOG, true);
return true;
case WM_COMMAND:
switch (LOWORD(wParam))
{
case IDCANCEL:
EndDialog(hW, true);
return true;
case IDOK:
if (IsDlgButtonChecked(hW, IDC_LOG))
conf.log = 1;
else
conf.log = 0;
SaveConfig();
EndDialog(hW, false);
return true;
}
}
return false;
}
map<int, int> mapConfOpts;
#define PUT_CONF(id) mapConfOpts[IDC_CONFOPT_##id] = 0x##id;
void OnInitDialog(HWND hW)
void OnAdvOK(HWND hW)
{
if (!(conf.zz_options.loaded)) LoadConfig();
conf.hacks._u32 = 0;
CheckDlgButton(hW, IDC_CONFIG_INTERLACE, conf.interlace);
CheckDlgButton(hW, IDC_CONFIG_BILINEAR, conf.bilinear);
CheckDlgButton(hW, IDC_CONFIG_DEPTHWRITE, conf.mrtdepth);
CheckRadioButton(hW, IDC_CONFIG_AANONE, IDC_CONFIG_AA4, IDC_CONFIG_AANONE + conf.aa);
CheckDlgButton(hW, IDC_CONFIG_WIREFRAME, (conf.wireframe()) ? 1 : 0);
CheckDlgButton(hW, IDC_CONFIG_CAPTUREAVI, (conf.captureAvi()) ? 1 : 0);
CheckDlgButton(hW, IDC_CONFIG_FULLSCREEN, (conf.fullscreen()) ? 1 : 0);
CheckDlgButton(hW, IDC_CONFIG_WIDESCREEN, (conf.widescreen()) ? 1 : 0);
CheckDlgButton(hW, IDC_CONFIG_BMPSS, (conf.zz_options.tga_snap) ? 1 : 0);
CheckRadioButton(hW, IDC_CONF_WIN640, IDC_CONF_WIN1280, IDC_CONF_WIN640 + conf.zz_options.dimensions);
for (map<int, int>::iterator it = mapConfOpts.begin(); it != mapConfOpts.end(); ++it)
{
if (IsDlgButtonChecked(hW, it->first)) conf.hacks._u32 |= it->second;
}
prevbilinearfilter = conf.bilinear;
GSsetGameCRC(g_LastCRC, conf.hacks._u32);
SaveConfig();
EndDialog(hW, false);
}
void OnInitAdvDialog(HWND hW)
{
mapConfOpts.clear();
PUT_CONF(00000001);
@ -129,45 +94,87 @@ void OnInitDialog(HWND hW)
}
}
void OnOK(HWND hW)
BOOL CALLBACK AdvancedDialogProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam)
{
switch (uMsg)
{
case WM_INITDIALOG:
OnInitAdvDialog(hW);
return true;
case WM_COMMAND:
switch (LOWORD(wParam))
{
case IDCANCEL:
EndDialog(hW, true);
return true;
case IDOK:
OnAdvOK(hW);
return true;
}
}
return false;
}
void CALLBACK AdvancedDialog()
{
DialogBox(hInst,
MAKEINTRESOURCE(IDD_ADV_OPTIONS),
GetActiveWindow(),
(DLGPROC)AdvancedDialogProc);
}
void OnInitConfDialog(HWND hW)
{
if (!(conf.zz_options.loaded)) LoadConfig();
TCHAR *aaName[] = {"None", "x2", "x4", "x8", "x16"};
for(int i=0; i<5; i++)
{
ComboBox_AddString(GetDlgItem(hW, IDC_AA_COMBO), (LPARAM)aaName[i]);
}
ComboBox_SelectString(GetDlgItem(hW, IDC_AA_COMBO), -1, (LPARAM)aaName[conf.aa]);
TCHAR *sizeName[] = {"640 x 480", "800 x 600", "1024 x 768", "1280 x 960"};
for(int i=0; i<4; i++)
{
ComboBox_AddString(GetDlgItem(hW, IDC_WIN_SIZE_COMBO), (LPARAM)sizeName[i]);
}
ComboBox_SelectString(GetDlgItem(hW, IDC_WIN_SIZE_COMBO), -1, (LPARAM)sizeName[conf.zz_options.dimensions]);
CheckDlgButton(hW, IDC_CONFIG_INTERLACE, conf.interlace);
CheckDlgButton(hW, IDC_CONFIG_BILINEAR, conf.bilinear);
CheckDlgButton(hW, IDC_CONFIG_DEPTHWRITE, conf.mrtdepth);
CheckDlgButton(hW, IDC_CONFIG_WIREFRAME, (conf.wireframe()) ? 1 : 0);
CheckDlgButton(hW, IDC_CONFIG_CAPTUREAVI, (conf.captureAvi()) ? 1 : 0);
CheckDlgButton(hW, IDC_CONFIG_FULLSCREEN, (conf.fullscreen()) ? 1 : 0);
CheckDlgButton(hW, IDC_CONFIG_WIDESCREEN, (conf.widescreen()) ? 1 : 0);
CheckDlgButton(hW, IDC_CONFIG_BMPSS, (conf.zz_options.tga_snap) ? 1 : 0);
prevbilinearfilter = conf.bilinear;
}
void OnConfOK(HWND hW)
{
u32 newinterlace = IsDlgButtonChecked(hW, IDC_CONFIG_INTERLACE);
if (!conf.interlace) conf.interlace = newinterlace;
else if (!newinterlace) conf.interlace = 2; // off
if (!conf.interlace)
conf.interlace = newinterlace;
else if (!newinterlace)
conf.interlace = 2; // off
conf.bilinear = IsDlgButtonChecked(hW, IDC_CONFIG_BILINEAR);
// restore
if (conf.bilinear && prevbilinearfilter)
conf.bilinear = prevbilinearfilter;
if (conf.bilinear && prevbilinearfilter) conf.bilinear = prevbilinearfilter;
//conf.mrtdepth = 1;//IsDlgButtonChecked(hW, IDC_CONFIG_DEPTHWRITE);
if (SendDlgItemMessage(hW, IDC_CONFIG_AANONE, BM_GETCHECK, 0, 0))
{
conf.aa = 0;
}
else if (SendDlgItemMessage(hW, IDC_CONFIG_AA2, BM_GETCHECK, 0, 0))
{
conf.aa = 1;
}
else if (SendDlgItemMessage(hW, IDC_CONFIG_AA4, BM_GETCHECK, 0, 0))
{
conf.aa = 2;
}
else if (SendDlgItemMessage(hW, IDC_CONFIG_AA8, BM_GETCHECK, 0, 0))
{
conf.aa = 3;
}
else if (SendDlgItemMessage(hW, IDC_CONFIG_AA16, BM_GETCHECK, 0, 0))
{
conf.aa = 4;
}
else
{
conf.aa = 0;
}
if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_AA_COMBO)) != -1)
conf.aa = ComboBox_GetCurSel(GetDlgItem(hW, IDC_AA_COMBO));
conf.zz_options._u32 = 0;
@ -177,22 +184,13 @@ void OnOK(HWND hW)
conf.zz_options.widescreen = IsDlgButtonChecked(hW, IDC_CONFIG_WIDESCREEN) ? 1 : 0;
conf.zz_options.tga_snap = IsDlgButtonChecked(hW, IDC_CONFIG_BMPSS) ? 1 : 0;
conf.hacks._u32 = 0;
for (map<int, int>::iterator it = mapConfOpts.begin(); it != mapConfOpts.end(); ++it)
{
if (IsDlgButtonChecked(hW, it->first)) conf.hacks._u32 |= it->second;
}
GSsetGameCRC(g_LastCRC, conf.hacks._u32);
if (SendDlgItemMessage(hW, IDC_CONF_WIN640, BM_GETCHECK, 0, 0))
if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_WIN_SIZE_COMBO)) == 0)
conf.zz_options.dimensions = GSDim_640;
else if (SendDlgItemMessage(hW, IDC_CONF_WIN800, BM_GETCHECK, 0, 0))
else if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_WIN_SIZE_COMBO)) == 1)
conf.zz_options.dimensions = GSDim_800;
else if (SendDlgItemMessage(hW, IDC_CONF_WIN1024, BM_GETCHECK, 0, 0))
else if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_WIN_SIZE_COMBO)) == 2)
conf.zz_options.dimensions = GSDim_1024;
else if (SendDlgItemMessage(hW, IDC_CONF_WIN1280, BM_GETCHECK, 0, 0))
else if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_WIN_SIZE_COMBO)) == 3)
conf.zz_options.dimensions = GSDim_1280;
SaveConfig();
@ -205,19 +203,26 @@ BOOL CALLBACK ConfigureDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam)
switch (uMsg)
{
case WM_INITDIALOG:
OnInitDialog(hW);
OnInitConfDialog(hW);
return true;
case WM_COMMAND:
switch (LOWORD(wParam))
{
case IDC_AA_COMBO:
break;
case IDC_ADV_BTN:
AdvancedDialog();
return true;
case IDCANCEL:
EndDialog(hW, true);
return true;
case IDOK:
OnOK(hW);
OnConfOK(hW);
return true;
}
}
@ -225,13 +230,26 @@ BOOL CALLBACK ConfigureDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam)
return false;
}
void CALLBACK GSconfigure()
{
DialogBox(hInst,
MAKEINTRESOURCE(IDD_CONFIG2),
GetActiveWindow(),
(DLGPROC)ConfigureDlgProc);
if (g_nPixelShaderVer == SHADER_REDUCED) conf.bilinear = 0;
}
s32 CALLBACK GStest()
{
return 0;
}
BOOL CALLBACK AboutDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam)
{
switch (uMsg)
{
case WM_INITDIALOG:
//ZeroGS uses floating point render targets because A8R8G8B8 format is not sufficient for ps2 blending and this requires alpha blending on floating point render targets
//There might be a problem with pixel shader precision with older geforce models (textures will look blocky).
return true;
case WM_COMMAND:
@ -246,21 +264,6 @@ BOOL CALLBACK AboutDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam)
return false;
}
void CALLBACK GSconfigure()
{
DialogBox(hInst,
MAKEINTRESOURCE(IDD_CONFIG),
GetActiveWindow(),
(DLGPROC)ConfigureDlgProc);
if (g_nPixelShaderVer == SHADER_REDUCED) conf.bilinear = 0;
}
s32 CALLBACK GStest()
{
return 0;
}
void CALLBACK GSabout()
{
DialogBox(hInst,

View File

@ -5,7 +5,6 @@
#define IDC_CONF_DEFAULT 3
#define IDR_DATA1 112
#define IDD_ADV_OPTIONS 113
#define IDD_DIALOG1 114
#define IDD_CONFIG2 114
#define IDC_ABOUTTEXT 1015
#define IDC_CONFIG_AA 1016
@ -52,12 +51,15 @@
#define IDC_CONFOPT_00004000 1047
#define IDC_BUTTON1 1048
#define IDC_CONFOPT_COMPUTEOR 1048
#define IDC_ADV_BTN 1048
#define IDC_CONFOPT_4001 1049
#define IDC_CONFOPT_00000010 1049
#define IDC_CONFOPT_00008000 1050
#define IDC_CONFOPT_00010000 1052
#define IDC_CONFOPT_00020000 1054
#define IDC_AA_COMBO 1054
#define IDC_CONFOPT_00000002 1055
#define IDC_WIN_SIZE_COMBO 1055
#define IDC_CONFOPT_01000000 1056
#define IDC_CONFOPT_00800000 1057
#define IDC_CONFOPT_00000008 1058
@ -80,7 +82,7 @@
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 116
#define _APS_NEXT_COMMAND_VALUE 40001
#define _APS_NEXT_CONTROL_VALUE 1051
#define _APS_NEXT_CONTROL_VALUE 1056
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif

View File

@ -206,32 +206,28 @@ BEGIN
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,266,365,8
END
IDD_CONFIG2 DIALOGEX 0, 0, 171, 217
IDD_CONFIG2 DIALOGEX 0, 0, 159, 160
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "ZZOgl Options"
FONT 8, "MS Shell Dlg", 400, 0, 0x1
BEGIN
DEFPUSHBUTTON "OK",IDOK,55,192,50,14
PUSHBUTTON "Cancel",IDCANCEL,108,192,50,14
GROUPBOX "Static",IDC_STATIC,7,7,152,183
CONTROL "Logging (For Debugging)",1000,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,18,102,10
DEFPUSHBUTTON "OK",IDOK,37,138,50,14
PUSHBUTTON "Cancel",IDCANCEL,91,138,50,14
CONTROL "Logging (For Debugging)",1000,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,7,102,10
CONTROL "Interlace Enable (toggle with F5). There are 2 modes + interlace off",IDC_CONFIG_INTERLACE,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,45,137,18
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,7,32,137,18
CONTROL "Bilinear Filtering (Shift+F5). Best quality is on, turn off for speed.",IDC_CONFIG_BILINEAR,
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,67,137,18
"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,7,50,137,18
CONTROL "Capture Avi (zerogs.avi) (F12)",IDC_CONFIG_CAPTUREAVI,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,103,109,10
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,82,109,10
CONTROL "Save Snapshots as BMP(default is JPG)",IDC_CONFIG_BMPSS,
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,116,141,10
CONTROL "Wide Screen",IDC_CONFIG_WIDESCREEN,"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,90,109,10
CONTROL "640 x 480",IDC_CONF_WIN640,"Button",BS_AUTORADIOBUTTON | WS_GROUP,20,140,59,8
CONTROL "800 x 600",IDC_CONF_WIN800,"Button",BS_AUTORADIOBUTTON,21,152,59,8
CONTROL "1024 x 768",IDC_CONF_WIN1024,"Button",BS_AUTORADIOBUTTON,86,140,59,8
CONTROL "1280 x 960",IDC_CONF_WIN1280,"Button",BS_AUTORADIOBUTTON,86,151,53,8
GROUPBOX "Default Window Size (no speed impact)",IDC_STATIC,14,129,137,39
COMBOBOX IDC_COMBO1,59,31,48,30,CBS_DROPDOWNLIST | CBS_SORT | WS_VSCROLL | WS_TABSTOP
LTEXT "Anti-aliasing",IDC_STATIC,15,33,43,13
PUSHBUTTON "Advanced...",IDC_BUTTON1,17,170,134,14
"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,93,141,10
CONTROL "Wide Screen",IDC_CONFIG_WIDESCREEN,"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,7,69,109,10
LTEXT "Anti-aliasing",IDC_STATIC,7,20,43,13
PUSHBUTTON "Advanced...",IDC_ADV_BTN,7,118,134,14
COMBOBOX IDC_AA_COMBO,53,18,48,30,CBS_DROPDOWN | WS_VSCROLL | WS_TABSTOP
COMBOBOX IDC_WIN_SIZE_COMBO,78,104,62,30,CBS_DROPDOWN | WS_VSCROLL | WS_TABSTOP
LTEXT "Default Window Size",IDC_STATIC,7,106,68,8
END
@ -277,9 +273,9 @@ BEGIN
IDD_CONFIG2, DIALOG
BEGIN
LEFTMARGIN, 7
RIGHTMARGIN, 164
RIGHTMARGIN, 152
TOPMARGIN, 7
BOTTOMMARGIN, 210
BOTTOMMARGIN, 152
END
END
#endif // APSTUDIO_INVOKED
@ -311,27 +307,6 @@ END
#endif // APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// Dialog Info
//
IDD_CONFIG2 DLGINIT
BEGIN
IDC_COMBO1, 0x403, 5, 0
0x6f4e, 0x656e, "\000"
IDC_COMBO1, 0x403, 3, 0
0x5832, "\000"
IDC_COMBO1, 0x403, 3, 0
0x5834, "\000"
IDC_COMBO1, 0x403, 3, 0
0x5838, "\000"
IDC_COMBO1, 0x403, 4, 0
0x3631, 0x0058,
0
END
#endif // English (U.S.) resources
/////////////////////////////////////////////////////////////////////////////

View File

@ -481,10 +481,6 @@
RelativePath="..\zerogs.h"
>
</File>
<File
RelativePath="..\zerogsmath.h"
>
</File>
<File
RelativePath="..\ZZGl.h"
>
@ -497,6 +493,10 @@
RelativePath="..\ZZoglFlushHack.h"
>
</File>
<File
RelativePath="..\ZZoglMath.h"
>
</File>
<File
RelativePath="..\ZZoglShaders.h"
>
@ -528,11 +528,11 @@
</File>
</Filter>
<File
RelativePath=".\ps2hw.dat"
RelativePath="..\ps2hw.dat"
>
</File>
<File
RelativePath="..\ps2hw.dat"
RelativePath=".\ps2hw.dat"
>
</File>
</Files>

View File

@ -50,7 +50,7 @@ extern bool g_bMakeSnapshot;
extern string strSnapshot;
// Adjusts vertex shader BitBltPos vector v to preserve aspect ratio. It used to emulate 4:3 or 16:9.
void ZeroGS::AdjustTransToAspect(Vector& v)
void ZeroGS::AdjustTransToAspect(float4& v)
{
double temp;
float f;
@ -242,11 +242,11 @@ inline void RenderStartHelper(u32 bInterlace)
// on image y coords. So if we write valpha.z * F + valpha.w + 0.5, it would be switching odd
// and even strings at each frame.
// valpha.x and y are used for image blending.
inline Vector RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTSHADER* prog)
inline float4 RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTSHADER* prog)
{
SetShaderCaller("RenderGetForClip");
Vector valpha;
float4 valpha;
// first render the current render targets, then from ptexMem
if (psm == 1)
@ -282,7 +282,7 @@ inline Vector RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTS
valpha.w = 1;
}
ZZshSetParameter4fv(prog->sOneColor, valpha, "g_fOneColor");
ZZshSetParameter4fv(prog->prog, prog->sOneColor, valpha, "g_fOneColor");
return valpha;
}
@ -295,7 +295,7 @@ inline void RenderCreateInterlaceTex(u32 bInterlace, int th, FRAGMENTSHADER* pro
int interlacetex = CreateInterlaceTex(2 * th);
ZZshGLSetTextureParameter(prog->sInterlace, interlacetex, "Interlace");
ZZshGLSetTextureParameter(prog->prog, prog->sInterlace, interlacetex, "Interlace");
}
// Well, do blending setup prior to second pass of half-frame drawing
@ -396,10 +396,10 @@ inline int RenderGetOffsets(int* dby, int* movy, tex0Info& texframe, CRenderTarg
}
// BltBit shader calculate vertex (4 coord's pixel) position at the viewport.
inline Vector RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
inline float4 RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
{
SetShaderCaller("RenderSetTargetBitPos");
Vector v;
float4 v;
// dest rect
v.x = 1;
v.y = dh / (float)th;
@ -416,7 +416,7 @@ inline Vector RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
v.w += 1.0f / (float)dh ;
}
ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
return v;
}
@ -425,12 +425,12 @@ inline Vector RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
// For example, use tw / X and tw / X magnify the viewport.
// Interlaced output is little out of VB, it could be seen as an evil blinking line on top
// and bottom, so we try to remove it.
inline Vector RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool isInterlace)
inline float4 RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool isInterlace)
{
SetShaderCaller("RenderSetTargetBitTex");
Vector v;
v = Vector(th, tw, dh, dw);
float4 v;
v = float4(th, tw, dh, dw);
// Incorrect Aspect ratio on interlaced frames
@ -440,28 +440,28 @@ inline Vector RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool
v.w += 1.0f / conf.height;
}
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
return v;
}
// Translator for POSITION coordinates (-1.0:+1.0f at x axis, +1.0f:-1.0y at y) into target frame ones.
// We don't need x coordinate, because interlacing is y-axis only.
inline Vector RenderSetTargetBitTrans(int th)
inline float4 RenderSetTargetBitTrans(int th)
{
SetShaderCaller("RenderSetTargetBitTrans");
Vector v = Vector(float(th), -float(th), float(th), float(th));
ZZshSetParameter4fv(pvsBitBlt.fBitBltTrans, v, "g_fBitBltTrans");
float4 v = float4(float(th), -float(th), float(th), float(th));
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.fBitBltTrans, v, "g_fBitBltTrans");
return v;
}
// use g_fInvTexDims to store inverse texture dims
// Seems, that Targ shader does not use it
inline Vector RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHADER* prog)
inline float4 RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHADER* prog)
{
SetShaderCaller("RenderSetTargetInvTex");
Vector v = Vector(0, 0, 0, 0);
float4 v = float4(0, 0, 0, 0);
if (prog->sInvTexDims)
{
@ -469,7 +469,7 @@ inline Vector RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHAD
v.y = 1.0f / (float)th;
v.z = (float)0.0;
v.w = -0.5f / (float)th;
ZZshSetParameter4fv(prog->sInvTexDims, v, "g_fInvTexDims");
ZZshSetParameter4fv(prog->prog, prog->sInvTexDims, v, "g_fInvTexDims");
}
return v;
@ -544,17 +544,17 @@ inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
SetShaderCaller("RenderCheckForTargets");
// Texture
Vector v = RenderSetTargetBitTex((float)RW(texframe.tw), (float)RH(dh), (float)RW(pfb->DBX), (float)RH(dby), INTERLACE_COUNT);
float4 v = RenderSetTargetBitTex((float)RW(texframe.tw), (float)RH(dh), (float)RW(pfb->DBX), (float)RH(dby), INTERLACE_COUNT);
// dest rect
v = RenderSetTargetBitPos(dh, texframe.th, movy, INTERLACE_COUNT);
v = RenderSetTargetBitTrans(ptarg->fbh);
v = RenderSetTargetInvTex(bInterlace, texframe.tbw, ptarg->fbh, &ppsCRTCTarg[bInterlace]) ; // FIXME. This is no use
Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTCTarg[bInterlace]);
float4 valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTCTarg[bInterlace]);
// inside vb[0]'s target area, so render that region only
ZZshGLSetTextureParameter(ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex, "CRTC target");
ZZshGLSetTextureParameter(ppsCRTCTarg[bInterlace].prog, ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex, "CRTC target");
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTCTarg[bInterlace]);
ZZshSetPixelShader(ppsCRTCTarg[bInterlace].prog);
@ -582,7 +582,7 @@ inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
// this is the function that does it.
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
{
Vector v;
float4 v;
for (list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ++it)
{
@ -624,9 +624,9 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
v = RenderSetTargetBitTrans(texframe.th);
v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]);
Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);
float4 valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);
ZZshGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory");
ZZshGLSetTextureParameter(ppsCRTC[bInterlace].prog, ppsCRTC[bInterlace].sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory");
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]);
ZZshSetPixelShader(ppsCRTC[bInterlace].prog);

View File

@ -63,7 +63,7 @@ extern int s_nNewWidth, s_nNewHeight;
extern CRangeManager s_RangeMngr; // manages overwritten memory
extern void FlushTransferRanges(const tex0Info* ptex);
extern void ProcessMessages();
void AdjustTransToAspect(Vector& v);
void AdjustTransToAspect(float4& v);
// Interlace texture is lazy 1*(height) array of 1 and 0.
// If its height (named s_nInterlaceTexWidth here) is hanging we must redo

View File

@ -82,8 +82,8 @@ extern void KickTriangleFan();
extern void KickSprite();
extern void KickDummy();
extern bool LoadEffects();
extern bool LoadExtraEffects();
extern FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
extern bool ZZshLoadExtraEffects();
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
GLuint vboRect = 0;
vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
@ -127,7 +127,6 @@ void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum) = NULL;
// State parameters
extern u8* s_lpShaderResources;
ZZshProgram pvs[16] = {NULL};
// String's for shader file in developer mode
#ifdef DEVBUILD

View File

@ -156,7 +156,7 @@ int s_nWriteDestAlphaTest = 0; // ZZ
////////////////////
// State parameters
static Vector vAlphaBlendColor; // used for GPU_COLOR
static float4 vAlphaBlendColor; // used for GPU_COLOR
static bool bNeedBlendFactorInAlpha; // set if the output source alpha is different from the real source alpha (only when blend factor > 0x80)
static u32 s_dwColorWrite = 0xf; // the color write mask of the current target
@ -310,7 +310,7 @@ void ZeroGS::ReloadEffects()
memset(ppsTexture, 0, sizeof(ppsTexture));
LoadExtraEffects();
ZZshLoadExtraEffects();
#endif
}
@ -830,11 +830,11 @@ inline int FlushGetShaderType(VB& curvb, CRenderTarget* ptextarg, GLuint& ptexcl
//Set page offsets depends on shader type.
inline Vector FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRenderTarget* ptextarg)
inline float4 FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRenderTarget* ptextarg)
{
SetShaderCaller("FlushSetPageOffset");
Vector vpageoffset;
float4 vpageoffset;
vpageoffset.w = 0;
switch (shadertype)
@ -863,14 +863,14 @@ inline Vector FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRen
}
//Set texture offsets depends omn shader type.
inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg)
inline float4 FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg)
{
SetShaderCaller("FlushSetTexOffset");
Vector v;
float4 v;
if (shadertype == 3)
{
Vector v;
float4 v;
v.x = 16.0f / (float)curvb.tex0.tw;
v.y = 16.0f / (float)curvb.tex0.th;
v.z = 0.5f * v.x;
@ -879,7 +879,7 @@ inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c
}
else if (shadertype == 4)
{
Vector v;
float4 v;
v.x = 16.0f / (float)ptextarg->fbw;
v.y = 16.0f / (float)ptextarg->fbh;
v.z = -1;
@ -891,10 +891,10 @@ inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c
}
// Set dimension (Real!) of texture. z and w
inline Vector FlushTextureDims(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg)
inline float4 FlushTextureDims(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg)
{
SetShaderCaller("FlushTextureDims");
Vector vTexDims;
float4 vTexDims;
vTexDims.x = (float)RW(curvb.tex0.tw) ;
vTexDims.y = (float)RH(curvb.tex0.th) ;
@ -958,14 +958,14 @@ inline FRAGMENTSHADER* FlushUseExistRenderTarget(VB& curvb, CRenderTarget* ptext
//int psm = PIXEL_STORAGE_FORMAT(curvb.tex0);
int shadertype = FlushGetShaderType(curvb, ptextarg, ptexclut);
FRAGMENTSHADER* pfragment = LoadShadeEffect(shadertype, 0, curvb.curprim.fge,
FRAGMENTSHADER* pfragment = ZZshLoadShadeEffect(shadertype, 0, curvb.curprim.fge,
IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL);
Vector vpageoffset = FlushSetPageOffset(pfragment, shadertype, ptextarg);
float4 vpageoffset = FlushSetPageOffset(pfragment, shadertype, ptextarg);
Vector v = FlushSetTexOffset(pfragment, shadertype, curvb, ptextarg);
float4 v = FlushSetTexOffset(pfragment, shadertype, curvb, ptextarg);
Vector vTexDims = FlushTextureDims(pfragment, shadertype, curvb, ptextarg);
float4 vTexDims = FlushTextureDims(pfragment, shadertype, curvb, ptextarg);
if (pfragment->sCLUT != NULL && ptexclut != 0)
ZZshGLSetTextureParameter(pfragment->sCLUT, ptexclut, "CLUT");
@ -997,7 +997,7 @@ inline FRAGMENTSHADER* FlushMadeNewTarget(VB& curvb, int exactcolor, int context
}
}
FRAGMENTSHADER* pfragment = LoadShadeEffect(0, GetTexFilter(curvb.tex1), curvb.curprim.fge,
FRAGMENTSHADER* pfragment = ZZshLoadShadeEffect(0, GetTexFilter(curvb.tex1), curvb.curprim.fge,
IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL);
if (pfragment == NULL)
@ -1160,7 +1160,7 @@ inline u32 AlphaRenderAlpha(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pf
}
// harvest fishing
Vector v = vAlphaBlendColor;
float4 v = vAlphaBlendColor;
if (exactcolor)
{
@ -1173,7 +1173,7 @@ inline u32 AlphaRenderAlpha(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pf
else
{
// not using blending so set to defaults
Vector v = exactcolor ? Vector(1, 510 * 255.0f / 256.0f, 0, 0) : Vector(1, 2 * 255.0f / 256.0f, 0, 0);
float4 v = exactcolor ? float4(1, 510 * 255.0f / 256.0f, 0, 0) : float4(1, 2 * 255.0f / 256.0f, 0, 0);
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
}
@ -1257,7 +1257,7 @@ inline void AlphaPabe(VB& curvb, FRAGMENTSHADER* pfragment, int exactcolor)
glDisable(GL_BLEND);
GL_STENCILFUNC_SET();
Vector v;
float4 v;
v.x = 1;
v.y = 2;
v.z = 0;
@ -1330,7 +1330,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE
if (gs.pabe && bCanRenderStencil)
{
// only render the pixels with alpha values >= 0x80
Vector v = vAlphaBlendColor;
float4 v = vAlphaBlendColor;
if (exactcolor) { v.y *= 255; v.w *= 255; }
@ -1350,7 +1350,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE
glDisable(GL_BLEND);
GL_STENCILFUNC_SET();
Vector v;
float4 v;
v.x = 1;
v.y = 2;
v.z = 0;
@ -1409,7 +1409,7 @@ inline void AlphaSpecialTesting(VB& curvb, FRAGMENTSHADER* pfragment, u32 dwUsin
glStencilFunc(GL_EQUAL, STENCIL_SPECIAL | STENCIL_PIXELWRITE, STENCIL_SPECIAL | STENCIL_PIXELWRITE);
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
Vector v = Vector(0, exactcolor ? 510.0f : 2.0f, 0, 0);
float4 v = float4(0, exactcolor ? 510.0f : 2.0f, 0, 0);
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
Draw(curvb);
@ -1560,7 +1560,7 @@ inline void ZeroGS::RenderFBA(const VB& curvb, ZZshParameter sOneColor)
glAlphaFunc(GL_GEQUAL, 1);
Vector v(1,2,0,0);
float4 v(1,2,0,0);
ZZshSetParameter4fv(sOneColor, v, "g_fOneColor");
@ -1599,7 +1599,7 @@ __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, ZZshParameter sOneCo
SetShaderCaller("RenderAlphaTest");
Vector v(1,2,0,0);
float4 v(1,2,0,0);
ZZshSetParameter4fv(sOneColor, v, "g_fOneColor");
@ -1624,7 +1624,7 @@ __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, ZZshParameter sOneCo
if (curvb.test.ate && curvb.test.atst > 1 && curvb.test.aref > 0x80)
{
v = Vector(1,1,0,0);
v = float4(1,1,0,0);
ZZshSetParameter4fv(sOneColor, v, "g_fOneColor");
glAlphaFunc(g_dwAlphaCmp[curvb.test.atst], AlphaReferedValue(curvb.test.aref));
}
@ -1925,12 +1925,12 @@ void ZeroGS::SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint)
}
// clamp relies on texture width
void ZeroGS::SetTexClamping(int context, FRAGMENTSHADER* pfragment)
void SetTexClamping(int context, FRAGMENTSHADER* pfragment)
{
FUNCLOG
SetShaderCaller("SetTexClamping");
clampInfo* pclamp = &ZeroGS::vb[context].clamp;
Vector v, v2;
float4 v, v2;
v.x = v.y = 0;
u32* ptex = ZeroGS::vb[context].ptexClamp;
ptex[0] = ptex[1] = 0;
@ -2015,8 +2015,8 @@ void ZeroGS::SetTexClamping(int context, FRAGMENTSHADER* pfragment)
}
// Fixme should be in Vector lib
inline bool equal_vectors(Vector a, Vector b)
// Fixme should be in float4 lib
inline bool equal_vectors(float4 a, float4 b)
{
if (abs(a.x - b.x) + abs(a.y - b.y) + abs(a.z - b.z) + abs(a.w - b.w) < 0.01)
return true;
@ -2033,7 +2033,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
assert(!vb[context].bNeedTexCheck);
Vector v, v2;
float4 v, v2;
tex0Info& tex0 = vb[context].tex0;
@ -2045,14 +2045,14 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
SetShaderCaller("SetTexVariables");
// alpha and texture highlighting
Vector valpha, valpha2 ;
float4 valpha, valpha2 ;
// if clut, use the frame format
int psm = PIXEL_STORAGE_FORMAT(tex0);
// ZZLog::Error_Log( "A %d psm, is-clut %d. cpsm %d | %d %d", psm, PSMT_ISCLUT(psm), tex0.cpsm, tex0.tfx, tex0.tcc );
Vector vblack;
float4 vblack;
vblack.x = vblack.y = vblack.z = vblack.w = 10;
/* tcc -- Tecture Color Component 0=RGB, 1=RGBA + use Alpha from TEXA reg when not in PSM
@ -2096,7 +2096,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
/*
// Test, old code.
Vector valpha3, valpha4;
float4 valpha3, valpha4;
switch(tex0.tfx) {
case 0:
valpha3.z = 0; valpha3.w = 0;
@ -2206,7 +2206,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force)
{
FUNCLOG
Vector v;
float4 v;
CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(tex0, 1);
assert( pmemtarg != NULL && pfragment != NULL && pmemtarg->ptex != NULL);
@ -2248,7 +2248,7 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
float fbw = (float)tex0.tbw;
Vector vTexDims;
float4 vTexDims;
vTexDims.x = b.vTexDims.x * (fw);
vTexDims.y = b.vTexDims.y * (fh);
@ -2291,7 +2291,7 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims");
// ZZshSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from Vector to float[4] is ok.
// ZZshSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from float4 to float[4] is ok.
ZZshSetParameter4fv(pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock");
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
@ -2403,7 +2403,7 @@ void ZeroGS::SetAlphaVariables(const alphaInfo& a)
s_rgbeq = 1;
// s_alphaInfo = a;
vAlphaBlendColor = Vector(1, 2 * 255.0f / 256.0f, 0, 0);
vAlphaBlendColor = float4(1, 2 * 255.0f / 256.0f, 0, 0);
u32 usec = a.c;

View File

@ -2,12 +2,15 @@
*
* Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* Zerofrog forgot to write any copyright notice after release the plugin into GPLv2
* Zerofrog forgot to write any copyright notice after releasing the plugin into GPLv2
* If someone can contact him successfully to clarify this matter that would be great.
*/
#ifndef ZEROGS_MATH_H
#define ZEROGS_MATH_H
// Now that it's down to 82 lines, and most of it's fairly obvious, perhaps it'd be easier to
// just reimplement it... -arcum42
#ifndef ZZOGLMATH_H_INCLUDED
#define ZZOGLMATH_H_INCLUDED
#ifndef _WIN32
#include <alloca.h>
@ -22,16 +25,16 @@ typedef float dReal;
// class used for 3 and 4 dim vectors and quaternions
// It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used
class Vector
class float4
{
public:
dReal x, y, z, w;
Vector() : x(0), y(0), z(0), w(0) {}
Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
float4() : x(0), y(0), z(0), w(0) {}
float4(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
float4(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
float4(const float4 &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
float4(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
dReal operator[](int i) const { return (&x)[i]; }
dReal& operator[](int i) { return (&x)[i]; }
@ -40,7 +43,7 @@ class Vector
operator const dReal*() const { return (const dReal*)&x; }
// SCALAR FUNCTIONS
inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
inline dReal dot(const float4 &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
inline void SetColor(u32 color)
@ -53,28 +56,28 @@ class Vector
// 3 dim cross product, w is not touched
/// this = this x v
/// this = u x v
inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
inline Vector operator+(const Vector &r) const { Vector v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; }
inline Vector operator-(const Vector &r) const { Vector v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; }
inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; }
inline Vector operator*(dReal k) const { Vector v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; }
inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
inline Vector& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; }
friend Vector operator*(float f, const Vector& v);
//friend ostream& operator<<(ostream& O, const Vector& v);
//friend istream& operator>>(istream& I, Vector& v);
inline float4 operator-() const { float4 v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
inline float4 operator+(const float4 &r) const { float4 v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; }
inline float4 operator-(const float4 &r) const { float4 v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; }
inline float4 operator*(const float4 &r) const { float4 v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; }
inline float4 operator*(dReal k) const { float4 v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; }
inline float4& operator += (const float4& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
inline float4& operator -= (const float4& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
inline float4& operator *= (const float4& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
inline float4& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
inline float4& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; }
friend float4 operator*(float f, const float4& v);
//friend ostream& operator<<(ostream& O, const float4& v);
//friend istream& operator>>(istream& I, float4& v);
};
inline Vector operator*(float f, const Vector& left)
inline float4 operator*(float f, const float4& left)
{
Vector v;
float4 v;
v.x = f * left.x;
v.y = f * left.y;
v.z = f * left.z;
return v;
}
#endif
}
#endif // ZZOGLMATH_H_INCLUDED

File diff suppressed because it is too large Load Diff

View File

@ -55,16 +55,16 @@ inline bool ZZshActiveParameter(ZZshParameter param) {return (param !=NULL); }
#endif // end NVIDIA cg-toolkit API
const static char* g_pPsTexWrap[] = { "-DREPEAT", "-DCLAMP", "-DREGION_REPEAT", NULL };
const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex16to8h" };
enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC};
// We have "compatible" shaders, as RegularFogVS and RegularFogPS, if we don't need to worry about incompatible shaders.
// It's used only in GLSL mode.
// We have "compatible" shaders, as RegularFogVS and RegularFogPS. if don't need to wory about incompatible shaders
// It used only in GLSL mode.
// ------------------------- Variables -------------------------------
extern int g_nPixelShaderVer;
extern ZZshShaderLink pvs[16], g_vsprog, g_psprog;
extern ZZshParameter g_vparamPosXY[2], g_fparamFogColor;
extern int g_nPixelShaderVer;
extern ZZshShaderLink pvs[16], g_vsprog, g_psprog;
extern ZZshParameter g_vparamPosXY[2], g_fparamFogColor;
#define MAX_ACTIVE_UNIFORMS 600
#define MAX_ACTIVE_SHADERS 400
@ -73,18 +73,18 @@ struct FRAGMENTSHADER
{
FRAGMENTSHADER() : prog(sZero), Shader(0), sMemory(pZero), sFinal(pZero), sBitwiseANDX(pZero), sBitwiseANDY(pZero), sInterlace(pZero), sCLUT(pZero), sOneColor(pZero), sBitBltZ(pZero),
fTexAlpha2(pZero), fTexOffset(pZero), fTexDims(pZero), fTexBlock(pZero), fClampExts(pZero), fTexWrapMode(pZero),
fRealTexDims(pZero), fTestBlack(pZero), fPageOffset(pZero), fTexAlpha(pZero) {}
ZZshShaderLink prog; // it links to the FRAGMENTSHADER structure, for compatibility between GLSL and CG.
ZZshShader Shader; // GLSL store shaders not as ready programs, but as shader compiled objects. VS and PS should be linked together to
// make a program.
fRealTexDims(pZero), fTestBlack(pZero), fPageOffset(pZero), fTexAlpha(pZero) {}
ZZshShaderLink prog; // it link to FRAGMENTSHADER structure, for compability between GLSL and CG
ZZshShader Shader; // GLSL store shader's not as ready programs, but as shaders compilated object. VS and PS should be linked together to
// made a program.
ZZshShaderType ShaderType; // Not every PS and VS are used together, only compatible ones.
ZZshParameter sMemory, sFinal, sBitwiseANDX, sBitwiseANDY, sInterlace, sCLUT;
ZZshParameter sOneColor, sBitBltZ, sInvTexDims;
ZZshParameter fTexAlpha2, fTexOffset, fTexDims, fTexBlock, fClampExts, fTexWrapMode, fRealTexDims, fTestBlack, fPageOffset, fTexAlpha;
int ParametersStart, ParametersFinish; // this is part of UniformsIndex array in which parameters of this shader asre stored. The last one is ParametersFinish-1
int ParametersStart, ParametersFinish; // this is part of UniformsIndex array in which parameters of this shader stored. Last one is ParametersFinish-1
#ifdef _DEBUG
string filename;
@ -145,7 +145,7 @@ struct FRAGMENTSHADER
return false;
}
bool set_shader_const(Vector v, const char *name)
bool set_shader_const(float4 v, const char *name)
{
ZZshParameter p;
@ -174,29 +174,17 @@ struct VERTEXSHADER
int ParametersStart, ParametersFinish;
};
namespace ZeroGS {
// Shaders variables
extern Vector g_vdepth;
extern Vector vlogz;
namespace ZeroGS {
extern float4 g_vdepth;
extern float4 vlogz;
extern VERTEXSHADER pvsBitBlt;
extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; // ppsOne used to stop using shaders for draw
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
bool LoadEffects();
bool LoadExtraEffects();
FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
// only sets a limited amount of state (for Update)
void SetTexClamping(int context, FRAGMENTSHADER* pfragment);
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force);
extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
}
// ------------------------- Variables -------------------------------
extern u8* s_lpShaderResources;
extern ZZshProfile cgvProf, cgfProf;
extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
// ------------------------- Functions -------------------------------
#ifdef NVIDIA_CG_API
@ -208,7 +196,7 @@ inline bool ZZshExistProgram(ZZshShaderLink prog) {return (prog != NULL); };
extern const char* ShaderCallerName;
extern const char* ShaderHandleName;
inline void SetShaderCaller(const char* Name) {
inline void SetShaderCaller(const char* Name) {
ShaderCallerName = Name;
}
@ -222,22 +210,23 @@ inline void ResetShaderCounters() {
extern bool ZZshCheckProfilesSupport();
extern bool ZZshStartUsingShaders();
extern bool ZZshCreateOpenShadersFile();
extern void ZZshGLDisableProfile();
extern void ZZshGLEnableProfile();
extern void ZZshSetParameter4fv(ZZshShaderLink prog, ZZshParameter param, const float* v, const char* name);
extern void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name);
extern void ZZshSetParameter4fvWithRetry(ZZshParameter* param, ZZshShaderLink prog, const float* v, const char* name);
extern void ZZshGLSetTextureParameter(ZZshShaderLink prog, ZZshParameter param, GLuint texobj, const char* name);
extern void ZZshGLSetTextureParameter(ZZshParameter param, GLuint texobj, const char* name);
extern void ZZshDefaultOneColor( FRAGMENTSHADER ptr );
extern void ZZshSetVertexShader(ZZshShader prog);
extern void ZZshSetPixelShader(ZZshShader prog);
extern void ZZshSetVertexShader(ZZshShaderLink prog);
extern void ZZshSetPixelShader(ZZshShaderLink prog);
extern bool ZZshLoadExtraEffects();
inline int GET_SHADER_INDEX(int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int context, int ps)
{
return type + texfilter*NUM_TYPES + NUM_FILTERS*NUM_TYPES*texwrap + NUM_TEXWRAPS*NUM_FILTERS*NUM_TYPES*(fog+2*writedepth+4*testaem+8*exactcolor+16*context+32*ps);
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
namespace ZeroGS {
// only sets a limited amount of state (for Update)
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force);
}
struct SHADERHEADER
{
unsigned int index, offset, size; // if highest bit of index is set, pixel shader
};
#endif

View File

@ -122,22 +122,22 @@ inline void FillOnlyStencilBuffer()
// used for transformation from vertex position in GS window.coords (I hope)
// to view coordinates (in range 0, 1).
inline Vector ZeroGS::CRenderTarget::DefaultBitBltPos()
inline float4 ZeroGS::CRenderTarget::DefaultBitBltPos()
{
Vector v = Vector(1, -1, 0.5f / (float)RW(fbw), 0.5f / (float)RH(fbh));
float4 v = float4(1, -1, 0.5f / (float)RW(fbw), 0.5f / (float)RH(fbh));
v *= 1.0f / 32767.0f;
ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_sBitBltPos");
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltPos, v, "g_sBitBltPos");
return v;
}
// Used to transform texture coordinates from GS (when 0,0 is upper left) to
// OpenGL (0,0 - lower left).
inline Vector ZeroGS::CRenderTarget::DefaultBitBltTex()
inline float4 ZeroGS::CRenderTarget::DefaultBitBltTex()
{
// I really sure that -0.5 is correct, because OpenGL have no half-offset
// issue, DirectX known for.
Vector v = Vector(1, -1, 0.5f / (float)RW(fbw), -0.5f / (float)RH(fbh));
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_sBitBltTex");
float4 v = float4(1, -1, 0.5f / (float)RW(fbw), -0.5f / (float)RH(fbh));
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_sBitBltTex");
return v;
}
@ -222,7 +222,7 @@ void ZeroGS::CRenderTarget::SetTarget(int fbplocal, const Rect2& scissor, int co
if (fbplocal != fbp)
{
Vector v;
float4 v;
// will be rendering to a subregion
u32 bpp = PSMT_ISHALF(psm) ? 2 : 4;
@ -401,7 +401,7 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth)
((CDepthTarget*)pdepth)->SetDepthStencilSurface();
SetShaderCaller("CRenderTarget::Update");
Vector v = DefaultBitBltPos();
float4 v = DefaultBitBltPos();
CRenderTargetMngr::MAPTARGETS::iterator ittarg;
@ -432,7 +432,7 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth)
if (nUpdateTarg)
{
ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ittarg->second->ptex, "BaseTexture.final");
ZZshGLSetTextureParameter(ppsBaseTexture.prog, ppsBaseTexture.sFinal, ittarg->second->ptex, "BaseTexture.final");
//assert( ittarg->second->fbw == fbw );
int offset = (fbp - ittarg->second->fbp) * 64 / fbw;
@ -445,7 +445,7 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth)
v.z = 0.25f;
v.w = (float)RH(offset) + 0.25f;
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
// v = DefaultBitBltTex(); Maybe?
ZZshDefaultOneColor ( ppsBaseTexture );
@ -472,14 +472,14 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth)
// Fix in r133 -- FFX movies and Gust backgrounds!
//SetTexVariablesInt(0, 0*(AA.x || AA.y) ? 2 : 0, texframe, false, &ppsBitBlt[!!s_AAx], 1);
SetTexVariablesInt(0, 0, texframe, false, &ppsBitBlt[bit_idx], 1);
ZZshGLSetTextureParameter(ppsBitBlt[bit_idx].sMemory, vb[0].pmemtarg->ptex->tex, "BitBlt.memory");
ZZshGLSetTextureParameter(ppsBitBlt[bit_idx].prog, ppsBitBlt[bit_idx].sMemory, vb[0].pmemtarg->ptex->tex, "BitBlt.memory");
v = Vector(1, 1, 0.0f, 0.0f);
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
v = float4(1, 1, 0.0f, 0.0f);
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
v.x = 1;
v.y = 2;
ZZshSetParameter4fv(ppsBitBlt[bit_idx].sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(ppsBitBlt[bit_idx].prog, ppsBitBlt[bit_idx].sOneColor, v, "g_fOneColor");
assert(ptex != 0);
@ -536,26 +536,26 @@ void ZeroGS::CRenderTarget::ConvertTo32()
SetShaderCaller("CRenderTarget::ConvertTo32");
// tex coords, test ffx bikanel island when changing these
Vector v = DefaultBitBltPos();
float4 v = DefaultBitBltPos();
v = DefaultBitBltTex();
v.x = (float)RW(16);
v.y = (float)RH(16);
v.z = -(float)RW(fbw);
v.w = (float)RH(8);
ZZshSetParameter4fv(ppsConvert16to32.fTexOffset, v, "g_fTexOffset");
ZZshSetParameter4fv(ppsConvert16to32.prog, ppsConvert16to32.fTexOffset, v, "g_fTexOffset");
v.x = (float)RW(8);
v.y = 0;
v.z = 0;
v.w = 0.25f;
ZZshSetParameter4fv(ppsConvert16to32.fPageOffset, v, "g_fPageOffset");
ZZshSetParameter4fv(ppsConvert16to32.prog, ppsConvert16to32.fPageOffset, v, "g_fPageOffset");
v.x = (float)RW(2 * fbw);
v.y = (float)RH(fbh);
v.z = 0;
v.w = 0.0001f * (float)RH(fbh);
ZZshSetParameter4fv(ppsConvert16to32.fTexDims, v, "g_fTexDims");
ZZshSetParameter4fv(ppsConvert16to32.prog, ppsConvert16to32.fTexDims, v, "g_fTexDims");
// v.x = 0;
// ZZshSetParameter4fv(ppsConvert16to32.fTexBlock, v, "g_fTexBlock");
@ -568,7 +568,7 @@ void ZeroGS::CRenderTarget::ConvertTo32()
ZeroGS::ResetRenderTarget(1);
BindToSample(&ptex);
ZZshGLSetTextureParameter(ppsConvert16to32.sFinal, ptex, "Convert 16 to 32.Final");
ZZshGLSetTextureParameter(ppsConvert16to32.prog, ppsConvert16to32.sFinal, ptex, "Convert 16 to 32.Final");
fbh /= 2; // have 16 bit surfaces are usually 2x higher
SetViewport();
@ -640,26 +640,26 @@ void ZeroGS::CRenderTarget::ConvertTo16()
SetShaderCaller("CRenderTarget::ConvertTo16");
// tex coords, test ffx bikanel island when changing these
Vector v = DefaultBitBltPos();
float4 v = DefaultBitBltPos();
v = DefaultBitBltTex();
v.x = 16.0f / (float)fbw;
v.y = 8.0f / (float)fbh;
v.z = 0.5f * v.x;
v.w = 0.5f * v.y;
ZZshSetParameter4fv(ppsConvert32to16.fTexOffset, v, "g_fTexOffset");
ZZshSetParameter4fv(ppsConvert32to16.prog, ppsConvert32to16.fTexOffset, v, "g_fTexOffset");
v.x = 256.0f / 255.0f;
v.y = 256.0f / 255.0f;
v.z = 0.05f / 256.0f;
v.w = -0.001f / 256.0f;
ZZshSetParameter4fv(ppsConvert32to16.fPageOffset, v, "g_fPageOffset");
ZZshSetParameter4fv(ppsConvert32to16.prog, ppsConvert32to16.fPageOffset, v, "g_fPageOffset");
v.x = (float)RW(fbw);
v.y = (float)RH(2 * fbh);
v.z = 0;
v.w = -0.1f / RH(fbh);
ZZshSetParameter4fv(ppsConvert32to16.fTexDims, v, "g_fTexDims");
ZZshSetParameter4fv(ppsConvert32to16.prog, ppsConvert32to16.fTexDims, v, "g_fTexDims");
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
SET_STREAM();
@ -671,7 +671,7 @@ void ZeroGS::CRenderTarget::ConvertTo16()
BindToSample(&ptex);
ZZshGLSetTextureParameter(ppsConvert32to16.sFinal, ptex, "Convert 32 to 16");
ZZshGLSetTextureParameter(ppsConvert32to16.prog, ppsConvert32to16.sFinal, ptex, "Convert 32 to 16");
// fbh *= 2; // have 16 bit surfaces are usually 2x higher
@ -748,22 +748,22 @@ void ZeroGS::CRenderTarget::_CreateFeedback()
ResetRenderTarget(1);
// tex coords, test ffx bikanel island when changing these
/* Vector v = DefaultBitBltPos();
v = Vector ((float)(RW(fbw+4)), (float)(RH(fbh+4)), +0.25f, -0.25f);
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "BitBltTex");*/
/* float4 v = DefaultBitBltPos();
v = float4 ((float)(RW(fbw+4)), (float)(RH(fbh+4)), +0.25f, -0.25f);
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "BitBltTex");*/
// tex coords, test ffx bikanel island when changing these
// Vector v = Vector(1, -1, 0.5f / (fbw << AA.x), 0.5f / (fbh << AA.y));
// float4 v = float4(1, -1, 0.5f / (fbw << AA.x), 0.5f / (fbh << AA.y));
// v *= 1/32767.0f;
// cgGLSetParameter4fv(pvsBitBlt.sBitBltPos, v);
Vector v = DefaultBitBltPos();
float4 v = DefaultBitBltPos();
v.x = (float)(RW(fbw));
v.y = (float)(RH(fbh));
v.z = 0.0f;
v.w = 0.0f;
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "BitBlt.Feedback");
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "BitBlt.Feedback");
ZZshDefaultOneColor(ppsBaseTexture);
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
@ -773,7 +773,7 @@ void ZeroGS::CRenderTarget::_CreateFeedback()
glBindTexture(GL_TEXTURE_RECTANGLE_NV, ptex);
GL_REPORT_ERRORD();
ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ptex, "BaseTexture.Feedback");
ZZshGLSetTextureParameter(ppsBaseTexture.prog, ppsBaseTexture.sFinal, ptex, "BaseTexture.Feedback");
SetViewport();
@ -976,9 +976,9 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr)
// write color and zero out stencil buf, always 0 context!
SetTexVariablesInt(0, 0, texframe, false, &ppsBitBltDepth, 1);
ZZshGLSetTextureParameter(ppsBitBltDepth.sMemory, vb[0].pmemtarg->ptex->tex, "BitBltDepth");
ZZshGLSetTextureParameter(ppsBitBltDepth.prog, ppsBitBltDepth.sMemory, vb[0].pmemtarg->ptex->tex, "BitBltDepth");
Vector v = DefaultBitBltPos();
float4 v = DefaultBitBltPos();
v = DefaultBitBltTex();
@ -986,9 +986,9 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr)
v.y = 2;
v.z = PSMT_IS16Z(psm) ? 1.0f : 0.0f;
v.w = g_filog32;
ZZshSetParameter4fv(ppsBitBltDepth.sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(ppsBitBltDepth.prog, ppsBitBltDepth.sOneColor, v, "g_fOneColor");
Vector vdepth = g_vdepth;
float4 vdepth = g_vdepth;
if (psm == PSMT24Z)
{
@ -1001,7 +1001,7 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr)
assert(ppsBitBltDepth.sBitBltZ != 0);
ZZshSetParameter4fv(ppsBitBltDepth.sBitBltZ, ((255.0f / 256.0f)*vdepth), "g_fBitBltZ");
ZZshSetParameter4fv(ppsBitBltDepth.prog, ppsBitBltDepth.sBitBltZ, ((255.0f / 256.0f)*vdepth), "g_fBitBltZ");
assert(pdepth != 0);
//GLint w1 = 0;

View File

@ -228,7 +228,6 @@ inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end)
return listTargs;
}
extern Vector g_vdepth;
extern int icurctx;
extern GLuint vboRect;

View File

@ -29,7 +29,6 @@
#include "Mem.h"
#include "x86.h"
#include "zerogs.h"
#include "zpipe.h"
#include "targets.h"
#include "GLWin.h"
#include "ZZoglShaders.h"
@ -51,7 +50,6 @@ extern int g_nFrame, g_nRealFrame;
//-------------------------- Variables
primInfo *prim;
ZZshProgram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ
inline u32 FtoDW(float f) { return (*((u32*)&f)); }
@ -82,7 +80,6 @@ PFNGLDRAWBUFFERSPROC glDrawBuffers = NULL;
/////////////////////
// graphics resources
ZZshParameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0;
bool s_bTexFlush = false;
int s_nLastResolveReset = 0;
@ -94,10 +91,8 @@ int nBackbufferWidth, nBackbufferHeight; // ZZ
namespace ZeroGS
{
Vector g_vdepth, vlogz;
// = Vector( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f));
// Vector g_vdepth = Vector( 65536.0f*65536.0f, 256.0f*65536.0f, 65536.0f, 256.0f);
// = float4( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f));
// float4 g_vdepth = float4( 65536.0f*65536.0f, 256.0f*65536.0f, 65536.0f, 256.0f);
extern CRangeManager s_RangeMngr; // manages overwritten memory
@ -341,7 +336,7 @@ void ZeroGS::DrawText(const char* pstr, int left, int top, u32 color)
FUNCLOG
ZZshGLDisableProfile();
Vector v;
float4 v;
v.SetColor(color);
glColor3f(v.z, v.y, v.x);
//glColor3f(((color >> 16) & 0xff) / 255.0f, ((color >> 8) & 0xff)/ 255.0f, (color & 0xff) / 255.0f);
@ -490,19 +485,19 @@ void ZeroGS::RenderCustom(float fAlpha)
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
// tex coords
Vector v = Vector(1 / 32767.0f, 1 / 32767.0f, 0, 0);
ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
float4 v = float4(1 / 32767.0f, 1 / 32767.0f, 0, 0);
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
v.x = (float)nLogoWidth;
v.y = (float)nLogoHeight;
ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
v.x = v.y = v.z = v.w = fAlpha;
ZZshSetParameter4fv(ppsBaseTexture.sOneColor, v, "g_fOneColor");
ZZshSetParameter4fv(ppsBaseTexture.prog, ppsBaseTexture.sOneColor, v, "g_fOneColor");
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
// inside vhDCb[0]'s target area, so render that region only
ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ptexLogo, "Logo");
ZZshGLSetTextureParameter(ppsBaseTexture.prog, ppsBaseTexture.sFinal, ptexLogo, "Logo");
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
SET_STREAM();
@ -781,7 +776,7 @@ void ZeroGS::SetFogColor(u32 fog)
ZeroGS::FlushBoth();
SetShaderCaller("SetFogColor");
Vector v;
float4 v;
// set it immediately
v.SetColor(gs.fogcol);
@ -795,7 +790,7 @@ void ZeroGS::SetFogColor(GIFRegFOGCOL* fog)
FUNCLOG
SetShaderCaller("SetFogColor");
Vector v;
float4 v;
v.x = fog->FCR / 255.0f;
v.y = fog->FCG / 255.0f;

View File

@ -66,7 +66,10 @@ extern float g_fiGPU_TEXWIDTH;
#define MASKDIVISOR 0 // Used for decrement bitwise mask texture size if 1024 is too big
#define GPU_TEXMASKWIDTH (1024 >> MASKDIVISOR) // bitwise mask width for region repeat mode
extern u32 ptexBlocks; // holds information on block tiling. It's texture number in OpenGL -- if 0 than such texture
extern u32 ptexConv16to32; // does not exists. This textures should be created on start and released on finish.
extern u32 ptexBilinearBlocks;
extern u32 ptexConv32to16;
// this is currently *not* used as a bool, in spite of its moniker --air
// Actually, the only thing written to it is 1 or 0, which makes the (g_bSaveFlushedFrame & 0x80000000) check rather bizzare.
@ -136,7 +139,7 @@ class CRenderTarget
int fbp, fbw, fbh, fbhCalc; // if fbp is negative, virtual target (not mapped to any real addr)
int start, end; // in bytes
u32 lastused; // time stamp since last used
Vector vposxy;
float4 vposxy;
u32 fbm;
u16 status;
@ -161,8 +164,8 @@ class CRenderTarget
TS_NeedConvert32 = 16,
TS_NeedConvert16 = 32,
};
inline Vector DefaultBitBltPos() ;
inline Vector DefaultBitBltTex() ;
inline float4 DefaultBitBltPos();
inline float4 DefaultBitBltTex();
private:
void _CreateFeedback();