diff --git a/pcsx2/COP0.cpp b/pcsx2/COP0.cpp index 0911eefde6..40f2bd7ae5 100644 --- a/pcsx2/COP0.cpp +++ b/pcsx2/COP0.cpp @@ -20,9 +20,10 @@ u32 s_iLastCOP0Cycle = 0; u32 s_iLastPERFCycle[2] = { 0, 0 }; -__ri void UpdateCP0Status() { - //currently the 2 memory modes are not implemented. Given this function is called so much, - //it's commented out for now. Only the interrupt test is needed. (rama) +// Updates the CPU's mode of operation (either, Kernel, Supervisor, or User modes). +// Currently the different modes are not implemented. +// Given this function is called so much, it's commented out for now. (rama) +__ri void cpuUpdateOperationMode() { //u32 value = cpuRegs.CP0.n.Status.val; @@ -32,7 +33,6 @@ __ri void UpdateCP0Status() { //} else { // User Mode // memSetUserMode(); //} - cpuTestHwInts(); } void __fastcall WriteCP0Status(u32 value) { diff --git a/pcsx2/Hw.cpp b/pcsx2/Hw.cpp index 44a0003d6a..1202a1911e 100644 --- a/pcsx2/Hw.cpp +++ b/pcsx2/Hw.cpp @@ -50,8 +50,7 @@ void hwReset() { hwInit(); - memzero_ptr( eeHw ); - //memset(eeHw+0x2000, 0, 0x0000e000); + memzero( eeHw ); psHu32(SBUS_F260) = 0x1D000060; @@ -73,16 +72,16 @@ void hwReset() ipuDmaReset(); } -__fi void intcInterrupt() +__fi uint intcInterrupt() { if ((psHu32(INTC_STAT)) == 0) { //DevCon.Warning("*PCSX2*: intcInterrupt already cleared"); - return; + return 0; } if ((psHu32(INTC_STAT) & psHu32(INTC_MASK)) == 0) { //DevCon.Warning("*PCSX2*: No valid interrupt INTC_MASK: %x INTC_STAT: %x", psHu32(INTC_MASK), psHu32(INTC_STAT)); - return; + return 0; } HW_LOG("intcInterrupt %x", psHu32(INTC_STAT) & psHu32(INTC_MASK)); @@ -91,27 +90,29 @@ __fi void intcInterrupt() counters[1].hold = rcntRcount(1); } - cpuException(0x400, cpuRegs.branch); + //cpuException(0x400, cpuRegs.branch); + return 0x400; } -__fi void dmacInterrupt() +__fi uint dmacInterrupt() { if( ((psHu16(DMAC_STAT + 2) & psHu16(DMAC_STAT)) == 0 ) && ( psHu16(DMAC_STAT) & 0x8000) == 0 ) { //DevCon.Warning("No valid DMAC interrupt MASK %x STAT %x", psHu16(DMAC_STAT+2), psHu16(DMAC_STAT)); - return; + return 0; } - if (!(dmacRegs.ctrl.DMAE) || psHu8(DMAC_ENABLER+2) == 1) + if (!dmacRegs.ctrl.DMAE || psHu8(DMAC_ENABLER+2) == 1) { //DevCon.Warning("DMAC Suspended or Disabled on interrupt"); - return; + return 0; } HW_LOG("dmacInterrupt %x", (psHu16(DMAC_STAT + 2) & psHu16(DMAC_STAT) | - psHu16(DMAC_STAT) & 0x8000)); + psHu16(DMAC_STAT) & 0x8000)); - cpuException(0x800, cpuRegs.branch); + //cpuException(0x800, cpuRegs.branch); + return 0x800; } void hwIntcIrq(int n) diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 165e2a0b40..5e19cd9124 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -55,9 +55,6 @@ int coded_block_pattern = 0; u8 indx4[16*16/2]; __aligned16 decoder_t decoder; -__aligned16 u8 _readbits[80]; //local buffer (ring buffer) -u8* readbits = _readbits; // always can decrement by one 1qw - __fi void IPUProcessInterrupt() { if (ipuRegs.ctrl.BUSY && g_BP.IFC) IPUWorker(); @@ -96,8 +93,6 @@ void ReportIPU() Console.WriteLn("g_decoder = 0x%x.", &decoder); Console.WriteLn("mpeg2_scan = 0x%x.", &mpeg2_scan); Console.WriteLn(ipu_cmd.desc()); - Console.WriteLn("_readbits = 0x%x. readbits - _readbits, which is also frozen, is 0x%x.", - _readbits, readbits - _readbits); Console.Newline(); } @@ -114,15 +109,6 @@ void SaveStateBase::ipuFreeze() Freeze(coded_block_pattern); Freeze(decoder); Freeze(ipu_cmd); - Freeze(_readbits); - - int temp = readbits - _readbits; - Freeze(temp); - - if (IsLoading()) - { - readbits = _readbits; - } } void tIPU_CMD_IDEC::log() const @@ -213,21 +199,27 @@ __fi u32 ipuRead32(u32 mem) switch (mem) { ipucase(IPU_CTRL): // IPU_CTRL + { ipuRegs.ctrl.IFC = g_BP.IFC; ipuRegs.ctrl.CBP = coded_block_pattern; if (!ipuRegs.ctrl.BUSY) IPU_LOG("read32: IPU_CTRL=0x%08X", ipuRegs.ctrl._u32); - return ipuRegs.ctrl._u32; + return ipuRegs.ctrl._u32; + } ipucase(IPU_BP): // IPU_BP + { + pxAssume(g_BP.FP <= 2); + ipuRegs.ipubp = g_BP.BP & 0x7f; ipuRegs.ipubp |= g_BP.IFC << 8; - ipuRegs.ipubp |= (g_BP.FP /*+ g_BP.bufferhasnew*/) << 16; + ipuRegs.ipubp |= g_BP.FP << 16; IPU_LOG("read32: IPU_BP=0x%08X", ipuRegs.ipubp); - return ipuRegs.ipubp; + return ipuRegs.ipubp; + } default: IPU_LOG("read32: Addr=0x%08X Value = 0x%08X", mem, psHu32(IPU_CMD + mem)); @@ -283,9 +275,7 @@ void ipuSoftReset() ipu_cmd.clear(); ipuRegs.cmd.BUSY = 0; - g_BP.BP = 0; - g_BP.FP = 0; - //g_BP.bufferhasnew = 0; + memzero(g_BP); } __fi bool ipuWrite32(u32 mem, u32 value) @@ -354,12 +344,11 @@ static void ipuBCLR(u32 val) { ipu_fifo.in.clear(); + memzero(g_BP); g_BP.BP = val & 0x7F; - g_BP.FP = 0; - //g_BP.bufferhasnew = 0; + ipuRegs.ctrl.BUSY = 0; ipuRegs.cmd.BUSY = 0; - memzero(_readbits); IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP); } @@ -370,7 +359,7 @@ static bool ipuIDEC(u32 val, bool resume) if (!resume) { idec.log(); - g_BP.BP += idec.FB;//skip FB bits + g_BP.Advance(idec.FB); //from IPU_CTRL ipuRegs.ctrl.PCT = I_TYPE; //Intra DECoding;) @@ -407,7 +396,7 @@ static __fi bool ipuBDEC(u32 val, bool resume) bdec.log(s_bdec); if (IsDebugBuild) s_bdec++; - g_BP.BP += bdec.FB;//skip FB bits + g_BP.Advance(bdec.FB); decoder.coding_type = I_TYPE; decoder.mpeg1 = ipuRegs.ctrl.MP1; decoder.q_scale_type = ipuRegs.ctrl.QST; @@ -433,11 +422,7 @@ static bool __fastcall ipuVDEC(u32 val) switch (ipu_cmd.pos[0]) { case 0: - ipuRegs.cmd.DATA = 0; - if (!getBits32((u8*)&decoder.bitstream_buf, 0)) return false; - - decoder.bitstream_bits = -16; - BigEndian(decoder.bitstream_buf, decoder.bitstream_buf); + if (!bitstream_init()) return false; switch ((val >> 26) & 3) { @@ -459,17 +444,14 @@ static bool __fastcall ipuVDEC(u32 val) case 3://DMVector ipuRegs.cmd.DATA = get_dmv(); break; + + jNO_DEFAULT } - g_BP.BP += (int)decoder.bitstream_bits + 16; + ipuRegs.cmd.DATA &= 0xFFFF; + ipuRegs.cmd.DATA |= 0x10000; - if ((int)g_BP.BP < 0) - { - g_BP.BP += 128; - ReorderBitstream(); - } - - ipuRegs.cmd.DATA = (ipuRegs.cmd.DATA & 0xFFFF) | ((decoder.bitstream_bits + 16) << 16); + //ipuRegs.cmd.DATA = (ipuRegs.cmd.DATA & 0xFFFF) | ((decoder.bitstream_bits + 16) << 16); ipuRegs.ctrl.ECD = (ipuRegs.cmd.DATA == 0); case 1: @@ -479,14 +461,14 @@ static bool __fastcall ipuVDEC(u32 val) return false; } - BigEndian(ipuRegs.top, ipuRegs.top); + ipuRegs.top = BigEndian(ipuRegs.top); IPU_LOG("VDEC command data 0x%x(0x%x). Skip 0x%X bits/Table=%d (%s), pct %d", ipuRegs.cmd.DATA, ipuRegs.cmd.DATA >> 16, val & 0x3f, (val >> 26) & 3, (val >> 26) & 1 ? ((val >> 26) & 2 ? "DMV" : "MBT") : (((val >> 26) & 2 ? "MC" : "MBAI")), ipuRegs.ctrl.PCT); return true; - jNO_DEFAULT + jNO_DEFAULT } return false; @@ -496,7 +478,7 @@ static __fi bool ipuFDEC(u32 val) { if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false; - BigEndian(ipuRegs.cmd.DATA, ipuRegs.cmd.DATA); + ipuRegs.cmd.DATA = BigEndian(ipuRegs.cmd.DATA); ipuRegs.top = ipuRegs.cmd.DATA; IPU_LOG("FDEC read: 0x%08x", ipuRegs.top); @@ -553,11 +535,10 @@ static bool ipuSETVQ(u32 val) if (!getBits64(((u8*)vqclut) + 8 * ipu_cmd.pos[0], 1)) return false; } - IPU_LOG("SETVQ command.\nRead VQCLUT table from FIFO."); - IPU_LOG( - "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d " - "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d" - "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d " + IPU_LOG("SETVQ command. Read VQCLUT table from FIFO.\n" + "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n" + "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n" + "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d\n" "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d", vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F, vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F, @@ -723,148 +704,48 @@ __fi void ipu_vq(macroblock_rgb16& rgb16, u8* indx4) Console.Error("IPU: VQ not implemented"); } -__fi void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16) -{ - const u8 *s = (const u8*)&mb8; - s16 *d = (s16*)&mb16; - int i; - for (i = 0; i < 256; i++) *d++ = *s++; //Y bias - 16 - for (i = 0; i < 64; i++) *d++ = *s++; //Cr bias - 128 - for (i = 0; i < 64; i++) *d++ = *s++; //Cb bias - 128 -} - // -------------------------------------------------------------------------------------- // Buffer reader // -------------------------------------------------------------------------------------- -// move the readbits queue -__fi void inc_readbits() +__ri u32 UBITS(uint bits) { - readbits += 16; - if (readbits >= _readbits + 64) - { - // move back - *(u64*)(_readbits) = *(u64*)(_readbits + 64); - *(u64*)(_readbits + 8) = *(u64*)(_readbits + 72); - readbits = _readbits; - } + uint readpos8 = g_BP.BP/8; + + uint result = BigEndian(*(u32*)( (u8*)g_BP.internal_qwc + readpos8 )); + uint bp7 = (g_BP.BP & 7); + result <<= bp7; + result >>= (32 - bits); + + return result; } -// returns the pointer of readbits moved by 1 qword -__fi u8* next_readbits() +__ri s32 SBITS(uint bits) { - return readbits + 16; -} + // Read an unaligned 32 bit value and then shift the bits up and then back down. -// returns the pointer of readbits moved by 1 qword -u8* prev_readbits() -{ - if (readbits < _readbits + 16) return _readbits + 48 - (readbits - _readbits); + uint readpos8 = g_BP.BP/8; - return readbits - 16; -} + int result = BigEndian(*(s32*)( (s8*)g_BP.internal_qwc + readpos8 )); + uint bp7 = (g_BP.BP & 7); + result <<= bp7; + result >>= (32 - bits); -void ReorderBitstream() -{ - readbits = prev_readbits(); - g_BP.FP = 2; -} - -// IPU has a 2qword internal buffer whose status is pointed by FP. -// If FP is 1, there's 1 qword in buffer. Second qword is only loaded -// incase there are less than 32bits available in the first qword. -// \return Number of bits available (clamps at 16 bits) -u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size) -{ - if (g_BP.FP == 0) - { - if (ipu_fifo.in.read(next_readbits()) == 0) return 0; - - inc_readbits(); - g_BP.FP = 1; - } - - if ((g_BP.FP < 2) && ((*(int*)pointer + size) >= 128)) - { - if (ipu_fifo.in.read(next_readbits())) g_BP.FP += 1; - } - - if (*(int*)pointer >= 128) - { - pxAssert(g_BP.FP >= 1); - - if (g_BP.FP > 1) inc_readbits(); - - if (advance) - { - g_BP.FP--; - *pointer &= 127; - } - } - - return (g_BP.FP >= 1) ? g_BP.FP * 128 - (*(int*)pointer) : 0; + return result; } // whenever reading fractions of bytes. The low bits always come from the next byte // while the high bits come from the current byte -u8 __fastcall getBits128(u8 *address, u32 advance) +u8 getBits64(u8 *address, bool advance) { - u64 mask2; - u128 mask; - u8* readpos; + if (!g_BP.FillBuffer(64)) return 0; - // Check if the current BP has exceeded or reached the limit of 128 - if (FillInternalBuffer(&g_BP.BP, 1, 128) < 128) return 0; - - readpos = readbits + (int)g_BP.BP / 8; + const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; if (uint shift = (g_BP.BP & 7)) { - mask2 = 0xff >> shift; - mask.lo = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56); - mask.hi = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56); - - u128 notMask; - u128 data = *(u128*)(readpos + 1); - notMask.lo = ~mask.lo & data.lo; - notMask.hi = ~mask.hi & data.hi; - notMask.lo >>= 8 - shift; - notMask.lo |= (notMask.hi & (ULLONG_MAX >> (64 - shift))) << (64 - shift); - notMask.hi >>= 8 - shift; - - mask.hi = (((*(u128*)readpos).hi & mask.hi) << shift) | (((*(u128*)readpos).lo & mask.lo) >> (64 - shift)); - mask.lo = ((*(u128*)readpos).lo & mask.lo) << shift; - - notMask.lo |= mask.lo; - notMask.hi |= mask.hi; - *(u128*)address = notMask; - } - else - { - *(u128*)address = *(u128*)readpos; - } - - if (advance) g_BP.BP += 128; - - return 1; -} - -// whenever reading fractions of bytes. The low bits always come from the next byte -// while the high bits come from the current byte -u8 __fastcall getBits64(u8 *address, u32 advance) -{ - register u64 mask = 0; - u8* readpos; - - // Check if the current BP has exceeded or reached the limit of 128 - if (FillInternalBuffer(&g_BP.BP, 1, 64) < 64) return 0; - - readpos = readbits + (int)g_BP.BP / 8; - - if (uint shift = (g_BP.BP & 7)) - { - mask = (0xff >> shift); + u64 mask = (0xff >> shift); mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56); *(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift); @@ -874,89 +755,76 @@ u8 __fastcall getBits64(u8 *address, u32 advance) *(u64*)address = *(u64*)readpos; } - if (advance) g_BP.BP += 64; + if (advance) g_BP.Advance(64); return 1; } // whenever reading fractions of bytes. The low bits always come from the next byte // while the high bits come from the current byte -u8 __fastcall getBits32(u8 *address, u32 advance) +__fi u8 getBits32(u8 *address, bool advance) { - u32 mask; - u8* readpos; + if (!g_BP.FillBuffer(32)) return 0; - // Check if the current BP has exceeded or reached the limit of 128 - if (FillInternalBuffer(&g_BP.BP, 1, 32) < 32) return 0; - - readpos = readbits + (int)g_BP.BP / 8; - - if (uint shift = (g_BP.BP & 7)) + const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP/8]; + + if(uint shift = (g_BP.BP & 7)) { - mask = (0xff >> shift); + u32 mask = (0xff >> shift); mask = mask | (mask << 8) | (mask << 16) | (mask << 24); *(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift); } else { + // Bit position-aligned -- no masking/shifting necessary *(u32*)address = *(u32*)readpos; } - if (advance) g_BP.BP += 32; + if (advance) g_BP.Advance(32); return 1; } -__fi u8 __fastcall getBits16(u8 *address, u32 advance) +__fi u8 getBits16(u8 *address, bool advance) { - u32 mask; - u8* readpos; + if (!g_BP.FillBuffer(16)) return 0; - // Check if the current BP has exceeded or reached the limit of 128 - if (FillInternalBuffer(&g_BP.BP, 1, 16) < 16) return 0; - - readpos = readbits + (int)g_BP.BP / 8; + const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; if (uint shift = (g_BP.BP & 7)) { - mask = (0xff >> shift); + uint mask = (0xff >> shift); mask = mask | (mask << 8); - *(u16*)address = ((~mask & *(u16*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u16*)readpos) << shift); - } + } else { *(u16*)address = *(u16*)readpos; - } + } - if (advance) g_BP.BP += 16; + if (advance) g_BP.Advance(16); return 1; } -u8 __fastcall getBits8(u8 *address, u32 advance) +u8 getBits8(u8 *address, bool advance) { - u32 mask; - u8* readpos; + if (!g_BP.FillBuffer(8)) return 0; - // Check if the current BP has exceeded or reached the limit of 128 - if (FillInternalBuffer(&g_BP.BP, 1, 8) < 8) - return 0; - - readpos = readbits + (int)g_BP.BP / 8; + const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; if (uint shift = (g_BP.BP & 7)) - { - mask = (0xff >> shift); + { + uint mask = (0xff >> shift); *(u8*)address = (((~mask) & readpos[1]) >> (8 - shift)) | (((mask) & *readpos) << shift); - } + } else { *(u8*)address = *(u8*)readpos; - } + } - if (advance) g_BP.BP += 8; + if (advance) g_BP.Advance(8); return 1; } @@ -983,7 +851,7 @@ void IPUCMD_WRITE(u32 val) case SCE_IPU_VDEC: - g_BP.BP += val & 0x3F; + g_BP.Advance(val & 0x3F); // check if enough data in queue if (ipuVDEC(val)) return; @@ -993,9 +861,11 @@ void IPUCMD_WRITE(u32 val) break; case SCE_IPU_FDEC: - IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, FP %d, CHCR 0x%x", - val & 0x3f, g_BP.IFC, (int)g_BP.BP, g_BP.FP, ipu1dma.chcr._u32); - g_BP.BP += val & 0x3F; + IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, CHCR 0x%x", + val & 0x3f, g_BP.IFC, (int)g_BP.BP, ipu1dma.chcr._u32); + + g_BP.Advance(val & 0x3F); + if (ipuFDEC(val)) return; ipuRegs.cmd.BUSY = 0x80000000; ipuRegs.topbusy = 0x80000000; @@ -1009,7 +879,7 @@ void IPUCMD_WRITE(u32 val) case SCE_IPU_SETIQ: IPU_LOG("SETIQ command."); if (val & 0x3f) IPU_LOG("Skip %d bits.", val & 0x3f); - g_BP.BP += val & 0x3F; + g_BP.Advance(val & 0x3F); if (ipuSETIQ(val)) return; break; diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h index e33c211b3e..6759e547db 100644 --- a/pcsx2/IPU/IPU.h +++ b/pcsx2/IPU/IPU.h @@ -67,11 +67,66 @@ union tIPU_CTRL { void reset() { _u32 = 0; } }; -struct tIPU_BP { - u32 BP; // Bit stream point - u16 IFC; // Input FIFO counter - u8 FP; // FIFO point - u8 bufferhasnew; // Always 0. +struct __aligned16 tIPU_BP { + __aligned16 u128 internal_qwc[2]; + + u32 BP; // Bit stream point (0 to 128*2) + u32 IFC; // Input FIFO counter (8QWC) (0 to 8) + u32 FP; // internal FIFO (2QWC) fill status (0 to 2) + + __fi void Align() + { + BP = (BP + 7) & ~7; + Advance(0); + } + + __fi void Advance(uint bits) + { + BP += bits; + pxAssume( BP <= 256 ); + + if (BP > 127) + { + BP -= 128; + + if (FP == 2) + { + // when BP is over 128 it means we're reading data from the second quadword. Shift that one + // to the front and load the new quadword into the second QWC (its a manualized ringbuffer!) + + CopyQWC(&internal_qwc[0], &internal_qwc[1]); + FP = 1; + } + else + { + // if FP == 1 then the buffer has been completely drained. + // if FP == 0 then an already-drained buffer is being advanced. + // In either case we just assign FP to 0. + + FP = 0; + } + } + } + + __fi bool FillBuffer(u32 bits) + { + while (FP < 2) + { + if (ipu_fifo.in.read(&internal_qwc[FP]) == 0) + { + // Here we *try* to fill the entire internal QWC buffer; however that may not necessarily + // be possible -- so if the fill fails we'll only return 0 if we don't have enough + // remaining bits in the FIFO to fill the request. + + return ((FP!=0) && (BP + bits) <= 128); + } + + ++FP; + } + + return true; + } + wxString desc() const { return wxsFormat(L"Ipu BP: bp = 0x%x, IFC = 0x%x, FP = 0x%x.", BP, IFC, FP); @@ -217,10 +272,9 @@ extern void IPUCMD_WRITE(u32 val); extern void ipuSoftReset(); extern void IPUProcessInterrupt(); -extern u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size); -extern u8 __fastcall getBits128(u8 *address, u32 advance); -extern u8 __fastcall getBits64(u8 *address, u32 advance); -extern u8 __fastcall getBits32(u8 *address, u32 advance); -extern u8 __fastcall getBits16(u8 *address, u32 advance); -extern u8 __fastcall getBits8(u8 *address, u32 advance); +extern u8 getBits128(u8 *address, bool advance); +extern u8 getBits64(u8 *address, bool advance); +extern u8 getBits32(u8 *address, bool advance); +extern u8 getBits16(u8 *address, bool advance); +extern u8 getBits8(u8 *address, bool advance); diff --git a/pcsx2/IPU/IPU_Fifo.cpp b/pcsx2/IPU/IPU_Fifo.cpp index 2c2902cf6f..25b0aad6f5 100644 --- a/pcsx2/IPU/IPU_Fifo.cpp +++ b/pcsx2/IPU/IPU_Fifo.cpp @@ -19,7 +19,6 @@ #include "IPU/IPUdma.h" #include "mpeg2lib/Mpeg.h" - __aligned16 IPU_Fifo ipu_fifo; void IPU_Fifo::init() @@ -75,10 +74,7 @@ int IPU_Fifo_Input::write(u32* pMem, int size) while (transsize-- > 0) { - for (int i = 0; i <= 3; i++) - { - data[writepos + i] = pMem[i]; - } + CopyQWC(&data[writepos], pMem); writepos = (writepos + 4) & 31; pMem += 4; } @@ -86,118 +82,100 @@ int IPU_Fifo_Input::write(u32* pMem, int size) return firsttrans; } -int IPU_Fifo_Output::write(const u32 *value, int size) -{ - int transsize, firsttrans; - - if ((int)ipuRegs.ctrl.OFC >= 8) IPU0dma(); - - transsize = min(size, 8 - (int)ipuRegs.ctrl.OFC); - firsttrans = transsize; - - while (transsize-- > 0) - { - for (int i = 0; i <= 3; i++) - { - data[writepos + i] = ((u32*)value)[i]; - } - writepos = (writepos + 4) & 31; - value += 4; - } - - ipuRegs.ctrl.OFC += firsttrans; - IPU0dma(); - - return firsttrans; -} - int IPU_Fifo_Input::read(void *value) { // wait until enough data to ensure proper streaming. - if (g_BP.IFC < 4) + if (g_BP.IFC < 3) { // IPU FIFO is empty and DMA is waiting so lets tell the DMA we are ready to put data in the FIFO if(cpuRegs.eCycle[4] == 0x9999) { - CPU_INT( DMAC_TO_IPU, 4 ); + CPU_INT( DMAC_TO_IPU, 32 ); } - + if (g_BP.IFC == 0) return 0; pxAssert(g_BP.IFC > 0); } - // transfer 1 qword, split into two transfers - for (int i = 0; i <= 3; i++) - { - ((u32*)value)[i] = data[readpos + i]; - data[readpos + i] = 0; - } + CopyQWC(value, &data[readpos]); readpos = (readpos + 4) & 31; g_BP.IFC--; return 1; } -void IPU_Fifo_Output::_readsingle(void *value) +int IPU_Fifo_Output::write(const u32 *value, uint size) { - // transfer 1 qword, split into two transfers - for (int i = 0; i <= 3; i++) + pxAssumeMsg(size>0, "Invalid size==0 when calling IPU_Fifo_Output::write"); + + uint origsize = size; + do { + IPU0dma(); + + uint transsize = min(size, 8 - (uint)ipuRegs.ctrl.OFC); + if(!transsize) break; + + ipuRegs.ctrl.OFC = transsize; + size -= transsize; + while (transsize > 0) + { + CopyQWC(&data[writepos], value); + writepos = (writepos + 4) & 31; + value += 4; + --transsize; + } + } while(true); + + return origsize - size; + +#if 0 + if (ipuRegs.ctrl.OFC >= 8) IPU0dma(); + + uint transsize = min(size, 8 - (uint)ipuRegs.ctrl.OFC); + uint firsttrans = transsize; + + while (transsize > 0) { - ((u32*)value)[i] = data[readpos + i]; - data[readpos + i] = 0; + CopyQWC(&data[writepos], value); + writepos = (writepos + 4) & 31; + value += 4; + --transsize; } - readpos = (readpos + 4) & 31; + + ipuRegs.ctrl.OFC += firsttrans; + IPU0dma(); + + return firsttrans; +#endif } -void IPU_Fifo_Output::read(void *value, int size) +void IPU_Fifo_Output::read(void *value, uint size) { + pxAssume(ipuRegs.ctrl.OFC >= size); ipuRegs.ctrl.OFC -= size; + + // Zeroing the read data is not needed, since the ringbuffer design will never read back + // the zero'd data anyway. --air + + //__m128 zeroreg = _mm_setzero_ps(); while (size > 0) { - _readsingle(value); - value = (u32*)value + 4; - size--; + CopyQWC(value, &data[readpos]); + //_mm_store_ps((float*)&data[readpos], zeroreg); + + readpos = (readpos + 4) & 31; + value = (u128*)value + 1; + --size; } } -void IPU_Fifo_Output::readsingle(void *value) -{ - if (ipuRegs.ctrl.OFC > 0) - { - ipuRegs.ctrl.OFC--; - _readsingle(value); - } -} - -__fi bool decoder_t::ReadIpuData(u128* out) -{ - if(ipu0_data == 0) - { - IPU_LOG( "ReadFIFO/IPUout -> (fifo empty/no data available)" ); - return false; - } - - CopyQWC(out, GetIpuDataPtr()); - - --ipu0_data; - ++ipu0_idx; - - IPU_LOG( "ReadFIFO/IPUout -> %ls", out->ToString().c_str() ); - - return true; -} - void __fastcall ReadFIFO_IPUout(mem128_t* out) { - // FIXME! When ReadIpuData() doesn't succeed (returns false), the EE should probably stall - // until a value becomes available. This isn't exactly easy to do since the virtualized EE - // in PCSX2 *has* to be running in order for the IPU DMA to upload new input data to allow - // IPUout's FIFO to fill. Thus if we implement an EE stall, PCSX2 deadlocks. Grr. --air + if (!pxAssertDev( ipuRegs.ctrl.OFC > 0, "Attempted read from IPUout's FIFO, but the FIFO is empty!" )) return; + ipu_fifo.out.read(out, 1); - if (decoder.ReadIpuData(out)) - { - ipu_fifo.out.readpos = (ipu_fifo.out.readpos + 4) & 31; - } + // Games should always check the fifo before reading from it -- so if the FIFO has no data + // its either some glitchy game or a bug in pcsx2. } void __fastcall WriteFIFO_IPUin(const mem128_t* value) diff --git a/pcsx2/IPU/IPU_Fifo.h b/pcsx2/IPU/IPU_Fifo.h index 10a1e940d3..69d2eab597 100644 --- a/pcsx2/IPU/IPU_Fifo.h +++ b/pcsx2/IPU/IPU_Fifo.h @@ -37,13 +37,10 @@ struct IPU_Fifo_Output int readpos, writepos; // returns number of qw read - int write(const u32 * value, int size); - void read(void *value,int size); - void readsingle(void *value); + int write(const u32 * value, uint size); + void read(void *value, uint size); void clear(); wxString desc() const; - - void _readsingle(void *value); }; struct IPU_Fifo diff --git a/pcsx2/IPU/IPUdma.cpp b/pcsx2/IPU/IPUdma.cpp index 5a5949e0b0..26e9e80a6a 100644 --- a/pcsx2/IPU/IPUdma.cpp +++ b/pcsx2/IPU/IPUdma.cpp @@ -189,7 +189,7 @@ int IPU1dma() { if(!WaitGSPaths()) { // legacy WaitGSPaths() for now - IPU_INT_TO(4); //Give it a short wait. + IPU_INT_TO(32); //Give it a short wait. return totalqwc; } IPU_LOG("Processing Normal QWC left %x Finished %d In Progress %d", ipu1dma.qwc, IPU1Status.DMAFinished, IPU1Status.InProgress); @@ -203,7 +203,7 @@ int IPU1dma() { if(!WaitGSPaths()) { // legacy WaitGSPaths() for now - IPU_INT_TO(4); //Give it a short wait. + IPU_INT_TO(32); //Give it a short wait. return totalqwc; } IPU_LOG("Processing Chain QWC left %x Finished %d In Progress %d", ipu1dma.qwc, IPU1Status.DMAFinished, IPU1Status.InProgress); @@ -283,7 +283,7 @@ int IPU1dma() if(!WaitGSPaths() && ipu1dma.qwc > 0) { // legacy WaitGSPaths() for now - IPU_INT_TO(4); //Give it a short wait. + IPU_INT_TO(32); //Give it a short wait. return totalqwc; } IPU_LOG("Processing Start Chain QWC left %x Finished %d In Progress %d", ipu1dma.qwc, IPU1Status.DMAFinished, IPU1Status.InProgress); @@ -312,8 +312,9 @@ int IPU1dma() int IPU0dma() { + if(!ipuRegs.ctrl.OFC) return 0; + int readsize; - static int totalsize = 0; tDMA_TAG* pMem; if ((!(ipu0dma.chcr.STR) || (cpuRegs.interrupt & (1 << DMAC_FROM_IPU))) || (ipu0dma.qwc == 0)) @@ -329,7 +330,6 @@ int IPU0dma() pMem = dmaGetAddr(ipu0dma.madr, true); readsize = min(ipu0dma.qwc, (u16)ipuRegs.ctrl.OFC); - totalsize+=readsize; ipu_fifo.out.read(pMem, readsize); ipu0dma.madr += readsize << 4; @@ -363,7 +363,6 @@ int IPU0dma() //This broke vids in Digital Devil Saga //Note that interrupting based on totalsize is just guessing.. IPU_INT_FROM( readsize * BIAS ); - totalsize = 0; } return readsize; diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.cpp b/pcsx2/IPU/mpeg2lib/Mpeg.cpp index 27edd38f89..bdef4b2d49 100644 --- a/pcsx2/IPU/mpeg2lib/Mpeg.cpp +++ b/pcsx2/IPU/mpeg2lib/Mpeg.cpp @@ -47,10 +47,14 @@ const int non_linear_quantizer_scale [] = into 1st slot is copied to the 2nd slot. Which will later be copied back to the 1st slot when 128bits have been read. */ -extern void ReorderBitstream(); const DCTtab * tab; int mbaCount = 0; +int bitstream_init () +{ + return g_BP.FillBuffer(32); +} + int get_macroblock_modes() { int macroblock_modes; @@ -221,9 +225,7 @@ int __fi get_motion_delta(const int f_code) int __fi get_dmv() { - const DMVtab * tab; - - tab = DMV_2 + UBITS(2); + const DMVtab* tab = DMV_2 + UBITS(2); DUMPBITS(tab->len); return tab->dmv; } @@ -239,22 +241,21 @@ int get_macroblock_address_increment() else if (code >= 768) mba = MBA.mba11 + (UBITS(11) - 24); else switch (UBITS(11)) - { + { + case 8: /* macroblock_escape */ + DUMPBITS(11); + return 0x23; - case 8: /* macroblock_escape */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + if (decoder.mpeg1) + { DUMPBITS(11); - return 0x23; + return 0x22; + } - case 15: /* macroblock_stuffing (MPEG1 only) */ - if (decoder.mpeg1) - { - DUMPBITS(11); - return 0x22; - } - - default: - return 0;//error - } + default: + return 0;//error + } DUMPBITS(mba->len); @@ -336,11 +337,8 @@ do { \ val = (((s32)val) >> 31) ^ 2047; \ } while (0) -static __fi bool get_intra_block() +static bool get_intra_block() { - int i; - int j; - int val; const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm; const u8 (&quant_matrix)[64] = decoder.iq; int quantizer_scale = decoder.quantizer_scale; @@ -348,7 +346,7 @@ static __fi bool get_intra_block() u16 code; /* decode AC coefficients */ - for (i=1 + ipu_cmd.pos[4]; ; i++) + for (int i=1 + ipu_cmd.pos[4]; ; i++) { switch (ipu_cmd.pos[5]) { @@ -427,60 +425,65 @@ static __fi bool get_intra_block() return true; } - i+= tab->run == 65 ? GETBITS(6) : tab->run; + i += (tab->run == 65) ? GETBITS(6) : tab->run; if (i >= 64) { ipu_cmd.pos[4] = 0; return true; } + case 1: - if (!GETWORD()) - { - ipu_cmd.pos[4] = i - 1; - ipu_cmd.pos[5] = 1; - return false; + { + if (!GETWORD()) + { + ipu_cmd.pos[4] = i - 1; + ipu_cmd.pos[5] = 1; + return false; + } + + uint j = scan[i]; + int val; + + if (tab->run==65) /* escape */ + { + if(!decoder.mpeg1) + { + val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4; + DUMPBITS(12); + } + else + { + val = SBITS(8); + DUMPBITS(8); + + if (!(val & 0x7f)) + { + val = GETBITS(8) + 2 * val; + } + + val = (val * quantizer_scale * quant_matrix[i]) >> 4; + val = (val + ~ (((s32)val) >> 31)) | 1; + } + } + else + { + val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; + if(decoder.mpeg1) + { + /* oddification */ + val = (val - 1) | 1; + } + + /* if (bitstream_get (1)) val = -val; */ + int bit1 = SBITS(1); + val = (val ^ bit1) - bit1; + DUMPBITS(1); + } + + SATURATE(val); + dest[j] = val; + ipu_cmd.pos[5] = 0; } - - j = scan[i]; - - if (tab->run==65) /* escape */ - { - if(!decoder.mpeg1) - { - val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4; - DUMPBITS(12); - } - else - { - val = SBITS(8); - DUMPBITS(8); - - if (!(val & 0x7f)) - { - val = GETBITS(8) + 2 * val; - } - - val = (val * quantizer_scale * quant_matrix[i]) >> 4; - val = (val + ~ (((s32)val) >> 31)) | 1; - } - } - else - { - val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; - if(decoder.mpeg1) - { - /* oddification */ - val = (val - 1) | 1; - } - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS(1)) - SBITS(1); - DUMPBITS(1); - } - - SATURATE(val); - dest[j] = val; - ipu_cmd.pos[5] = 0; } } @@ -488,7 +491,7 @@ static __fi bool get_intra_block() return true; } -static __fi bool get_non_intra_block(int * last) +static bool get_non_intra_block(int * last) { int i; int j; @@ -614,8 +617,9 @@ static __fi bool get_non_intra_block(int * last) } else { + int bit1 = SBITS(1); val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5; - val = (val ^ SBITS(1)) - SBITS(1); + val = (val ^ bit1) - bit1; DUMPBITS(1); } @@ -682,25 +686,11 @@ void __fi finishmpeg2sliceIDEC() { ipuRegs.ctrl.SCD = 0; coded_block_pattern = decoder.coded_block_pattern; - - g_BP.BP += decoder.bitstream_bits - 16; - - if ((int)g_BP.BP < 0) - { - g_BP.BP = 128 + (int)g_BP.BP; - - // After BP is positioned correctly, we need to reload the old buffer - // so that reading may continue properly - ReorderBitstream(); - } - - FillInternalBuffer(&g_BP.BP, 1, 0); } bool mpeg2sliceIDEC() { u16 code; - u8 bit8; switch (ipu_cmd.pos[0]) { @@ -798,6 +788,9 @@ bool mpeg2sliceIDEC() ipu_cmd.pos[2] = 6; return false; } + break; + + jNO_DEFAULT; } // Send The MacroBlock via DmaIpuFrom @@ -812,23 +805,23 @@ bool mpeg2sliceIDEC() } case 2: - while (decoder.ipu0_data > 0) - { - uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); + { + pxAssume(decoder.ipu0_data > 0); - if (read == 0) - { - ipu_cmd.pos[1] = 2; - return false; - } - else - { - decoder.AdvanceIpuDataBy(read); - } + uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); + decoder.AdvanceIpuDataBy(read); + + if (decoder.ipu0_data != 0) + { + // IPU FIFO filled up -- Will have to finish transferring later. + ipu_cmd.pos[1] = 2; + return false; } decoder.mbc++; mbaCount = 0; + } + case 3: while (1) { @@ -851,18 +844,18 @@ bool mpeg2sliceIDEC() } else switch (UBITS(11)) { - case 8: /* macroblock_escape */ - mbaCount += 33; - /* pass through */ + case 8: /* macroblock_escape */ + mbaCount += 33; + /* pass through */ - case 15: /* macroblock_stuffing (MPEG1 only) */ - DUMPBITS(11); - continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS(11); + continue; - default: /* end of slice/frame, or error? */ - { - goto finish_idec; - } + default: /* end of slice/frame, or error? */ + { + goto finish_idec; + } } } @@ -886,17 +879,20 @@ bool mpeg2sliceIDEC() } break; + + jNO_DEFAULT; } ipu_cmd.pos[1] = 0; ipu_cmd.pos[2] = 0; } - + finish_idec: finishmpeg2sliceIDEC(); case 3: - bit8 = 1; + { + u8 bit8; if (!getBits8((u8*)&bit8, 0)) { ipu_cmd.pos[0] = 3; @@ -905,10 +901,10 @@ finish_idec: if (bit8 == 0) { - if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7); - + g_BP.Align(); ipuRegs.ctrl.SCD = 1; } + } case 4: if (!getBits32((u8*)&ipuRegs.top, 0)) @@ -917,8 +913,10 @@ finish_idec: return false; } - BigEndian(ipuRegs.top, ipuRegs.top); + ipuRegs.top = BigEndian(ipuRegs.top); break; + + jNO_DEFAULT; } return true; @@ -927,7 +925,6 @@ finish_idec: bool mpeg2_slice() { int DCT_offset, DCT_stride; - u8 bit8; macroblock_8& mb8 = decoder.mb8; macroblock_16& mb16 = decoder.mb16; @@ -1010,9 +1007,35 @@ bool mpeg2_slice() return false; } break; + + jNO_DEFAULT; } - ipu_copy(mb8, mb16); + // Copy macroblock8 to macroblock16 - without sign extension. + // Manually inlined due to MSVC refusing to inline the SSE-optimized version. + { + const u8 *s = (const u8*)&mb8; + u16 *d = (u16*)&mb16; + + //Y bias - 16 * 16 + //Cr bias - 8 * 8 + //Cb bias - 8 * 8 + + __m128i zeroreg = _mm_setzero_si128(); + + for (uint i = 0; i < (256+64+64) / 32; ++i) + { + //*d++ = *s++; + __m128i woot1 = _mm_load_si128((__m128i*)s); + __m128i woot2 = _mm_load_si128((__m128i*)s+1); + _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg)); + _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg)); + _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg)); + _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg)); + s += 32; + d += 32; + } + } } else { @@ -1077,6 +1100,8 @@ bool mpeg2_slice() } } break; + + jNO_DEFAULT; } } } @@ -1084,40 +1109,31 @@ bool mpeg2_slice() // Send The MacroBlock via DmaIpuFrom ipuRegs.ctrl.SCD = 0; coded_block_pattern = decoder.coded_block_pattern; - g_BP.BP += (int)decoder.bitstream_bits - 16; - - // BP goes from 0 to 128, so negative values mean to read old buffer - // so we minus from 128 to get the correct BP - if ((int)g_BP.BP < 0) - { - g_BP.BP = 128 + (int)g_BP.BP; - - // After BP is positioned correctly, we need to reload the old buffer - // so that reading may continue properly - ReorderBitstream(); - } decoder.mbc = 1; decoder.SetOutputTo(mb16); case 3: - while (decoder.ipu0_data > 0) - { - uint size = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); + { + pxAssume(decoder.ipu0_data > 0); - if (size == 0) - { - ipu_cmd.pos[0] = 3; - return false; - } - else - { - decoder.AdvanceIpuDataBy(size); - } + uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); + decoder.AdvanceIpuDataBy(read); + + if (decoder.ipu0_data != 0) + { + // IPU FIFO filled up -- Will have to finish transferring later. + ipu_cmd.pos[0] = 3; + return false; } + decoder.mbc++; + mbaCount = 0; + } + case 4: - bit8 = 1; + { + u8 bit8; if (!getBits8((u8*)&bit8, 0)) { ipu_cmd.pos[0] = 4; @@ -1126,11 +1142,11 @@ bool mpeg2_slice() if (bit8 == 0) { - if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7); - + g_BP.Align(); ipuRegs.ctrl.SCD = 1; } - + } + case 5: if (!getBits32((u8*)&ipuRegs.top, 0)) { @@ -1138,8 +1154,7 @@ bool mpeg2_slice() return false; } - BigEndian(ipuRegs.top, ipuRegs.top); - decoder.bitstream_bits = 0; + ipuRegs.top = BigEndian(ipuRegs.top); break; } diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.h b/pcsx2/IPU/mpeg2lib/Mpeg.h index 13431eb396..5ea46631e7 100644 --- a/pcsx2/IPU/mpeg2lib/Mpeg.h +++ b/pcsx2/IPU/mpeg2lib/Mpeg.h @@ -148,12 +148,12 @@ struct decoder_t { macroblock_rgb32 rgb32; macroblock_rgb16 rgb16; - uint ipu0_data; + uint ipu0_data; // amount of data in the output macroblock (in QWC) uint ipu0_idx; /* bit parsing stuff */ - u32 bitstream_buf; /* current 32 bit working set */ - int bitstream_bits; /* used bits in working set */ + //u32 bitstream_buf; /* current 32 bit working set */ + //int bitstream_bits; /* used bits in working set */ int quantizer_scale; /* remove */ int dmv_offset; /* remove */ @@ -230,7 +230,7 @@ struct decoder_t { ipu0_data -= amt; } - bool ReadIpuData(u128* out); + __fi bool ReadIpuData(u128* out); }; struct mpeg2_scan_pack @@ -241,6 +241,10 @@ struct mpeg2_scan_pack mpeg2_scan_pack(); }; +extern int bitstream_init (); +extern u32 UBITS(uint bits); +extern s32 SBITS(uint bits); + extern void mpeg2_idct_copy(s16 * block, u8* dest, int stride); extern void mpeg2_idct_add(int last, s16 * block, s16* dest, int stride); @@ -258,20 +262,19 @@ extern int get_dmv(); extern void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn); extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte); extern void ipu_vq(macroblock_rgb16& rgb16, u8* indx4); -extern void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16); extern int slice (u8 * buffer); #ifdef _MSC_VER -#define BigEndian(out, in) out = _byteswap_ulong(in) +#define BigEndian(in) _byteswap_ulong(in) #else -#define BigEndian(out, in) out = __builtin_bswap32(in) // or we could use the asm function bswap... +#define BigEndian(in) __builtin_bswap32(in) // or we could use the asm function bswap... #endif #ifdef _MSC_VER -#define BigEndian64(out, in) out = _byteswap_uint64(in) +#define BigEndian64(in) _byteswap_uint64(in) #else -#define BigEndian64(out, in) out = __builtin_bswap64(in) // or we could use the asm function bswap... +#define BigEndian64(in) __builtin_bswap64(in) // or we could use the asm function bswap... #endif extern __aligned16 const mpeg2_scan_pack mpeg2_scan; diff --git a/pcsx2/IPU/mpeg2lib/Vlc.h b/pcsx2/IPU/mpeg2lib/Vlc.h index 0b30d1b8bb..cac61dd40c 100644 --- a/pcsx2/IPU/mpeg2lib/Vlc.h +++ b/pcsx2/IPU/mpeg2lib/Vlc.h @@ -30,65 +30,24 @@ #ifndef __VLC_H__ #define __VLC_H__ -//static u8 word[4]; -//static u8 dword[8]; -//static u8 qword[16]; - static __fi int GETWORD() { - static u8 data[2]; - - if (decoder.bitstream_bits > 0) - { - if(!getBits16(data,1)) - { - return 0; - } - - /*u32 data; - BigEndian(data, *(u32*)word); - decoder.bitstream_buf |= (u64)data << decoder.bitstream_bits; - decoder.bitstream_bits -= 32;*/ - decoder.bitstream_buf |= (((u32)data[0] << 8) | data[1]) << decoder.bitstream_bits; - decoder.bitstream_bits -= 16; - } - - return 1; + return g_BP.FillBuffer(16); } -static __fi int bitstream_init () +// Removes bits from the bitstream. This is done independently of UBITS/SBITS because a +// lot of mpeg streams have to read ahead and rewind bits and re-read them at different +// bit depths or sign'age. +static __fi void DUMPBITS(uint num) { - if (!getBits32((u8*)&decoder.bitstream_buf, 1)) - { - return 0; - } - - decoder.bitstream_bits = -16; - BigEndian(decoder.bitstream_buf, decoder.bitstream_buf); - /*decoder.bitstream_buf = *(u64*)dword; - BigEndian64(decoder.bitstream_buf, decoder.bitstream_buf);*/ - - return 1; + g_BP.Advance(num); + //pxAssume(g_BP.FP != 0); } -/* remove num valid bits from bit_buf */ -static __fi void DUMPBITS(int num) +static __fi u32 GETBITS(uint num) { - decoder.bitstream_buf <<= num; - decoder.bitstream_bits += num; -} - -/* take num bits from the high part of bit_buf and zero extend them */ -#define UBITS(num) (((u32)decoder.bitstream_buf) >> (32 - (num))) - -/* take num bits from the high part of bit_buf and sign extend them */ -#define SBITS(num) (((s32)decoder.bitstream_buf) >> (32 - (num))) - -/* Get bits from bitstream */ -static __fi u32 GETBITS(int num) -{ - u16 retVal = UBITS(num); - DUMPBITS(num); + uint retVal = UBITS(num); + g_BP.Advance(num); return retVal; } diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index 1138594ecd..01223d6bd5 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -130,7 +130,7 @@ __ri void cpuException(u32 code, u32 bd) //Reset / NMI cpuRegs.pc = 0xBFC00000; Console.Warning("Reset request"); - UpdateCP0Status(); + cpuUpdateOperationMode(); return; } else if((code & 0x38000) == 0x10000) @@ -167,7 +167,7 @@ __ri void cpuException(u32 code, u32 bd) else cpuRegs.pc = 0xBFC00200 + offset; - UpdateCP0Status(); + cpuUpdateOperationMode(); } void cpuTlbMiss(u32 addr, u32 bd, u32 excode) @@ -196,7 +196,7 @@ void cpuTlbMiss(u32 addr, u32 bd, u32 excode) } cpuRegs.CP0.n.Status.b.EXL = 1; - UpdateCP0Status(); + cpuUpdateOperationMode(); // Log=1; varLog|= 0x40000000; } @@ -208,33 +208,6 @@ void cpuTlbMissW(u32 addr, u32 bd) { cpuTlbMiss(addr, bd, EXC_CODE_TLBS); } -__fi void _cpuTestMissingINTC() { - if (cpuRegs.CP0.n.Status.val & 0x400 && - psHu32(INTC_STAT) & psHu32(INTC_MASK)) { - if ((cpuRegs.interrupt & (1 << 30)) == 0) { - Console.Error("*PCSX2*: Error, missing INTC Interrupt"); - } - } -} - -__fi void _cpuTestMissingDMAC() { - if (cpuRegs.CP0.n.Status.val & 0x800 && - (psHu16(0xe012) & psHu16(0xe010) || - psHu16(0xe010) & 0x8000)) { - if ((cpuRegs.interrupt & (1 << 31)) == 0) { - Console.Error("*PCSX2*: Error, missing DMAC Interrupt"); - } - } -} - -void cpuTestMissingHwInts() { - if ((cpuRegs.CP0.n.Status.val & 0x10007) == 0x10001) { - _cpuTestMissingINTC(); - _cpuTestMissingDMAC(); -// _cpuTestTIMR(); - } -} - // sets a branch test to occur some time from an arbitrary starting point. __fi void cpuSetNextEvent( u32 startCycle, s32 delta ) { @@ -253,7 +226,7 @@ __fi void cpuSetNextEventDelta( s32 delta ) cpuSetNextEvent( cpuRegs.cycle, delta ); } -// tests the cpu cycle agaisnt the given start and delta values. +// tests the cpu cycle against the given start and delta values. // Returns true if the delta time has passed. __fi int cpuTestCycle( u32 startCycle, s32 delta ) { @@ -361,8 +334,8 @@ static bool cpuIntsEnabled(int Interrupt) { bool IntType = !!(cpuRegs.CP0.n.Status.val & Interrupt); //Choose either INTC or DMAC, depending on what called it - return cpuRegs.CP0.n.Status.b.EIE && cpuRegs.CP0.n.Status.b.IE && - !cpuRegs.CP0.n.Status.b.EXL && (cpuRegs.CP0.n.Status.b.ERL == 0) && IntType; + return IntType && cpuRegs.CP0.n.Status.b.EIE && cpuRegs.CP0.n.Status.b.IE && + !cpuRegs.CP0.n.Status.b.EXL && (cpuRegs.CP0.n.Status.b.ERL == 0); } // if cpuRegs.cycle is greater than this cycle, should check cpuEventTest for updates @@ -375,10 +348,19 @@ __fi void _cpuEventTest_Shared() ScopedBool etest(eeEventTestIsActive); g_nextEventCycle = cpuRegs.cycle + eeWaitCycles; + // ---- INTC / DMAC (CPU-level Exceptions) ----------------- + // Done first because exceptions raised during event tests need to be postponed a few + // cycles (fixes Grandia II [PAL], which does a spin loop on a vsync and expects to + // be able to read the value before the exception handler clears it). + + uint mask = intcInterrupt() | dmacInterrupt(); + if (cpuIntsEnabled(mask)) cpuException(mask, cpuRegs.branch); + + // ---- Counters ------------- // Important: the vsync counter must be the first to be checked. It includes emulation // escape/suspend hooks, and it's really a good idea to suspend/resume emulation before - // doing any actual meaninful branchtest logic. + // doing any actual meaningful branchtest logic. if( cpuTestCycle( nextsCounter, nextCounter ) ) { @@ -391,10 +373,10 @@ __fi void _cpuEventTest_Shared() _cpuTestTIMR(); // ---- Interrupts ------------- - // Handles all interrupts except 30 and 31, which are handled later. + // These are basically just DMAC-related events, which also piggy-back the same bits as + // the PS2's own DMA channel IRQs and IRQ Masks. - if( cpuRegs.interrupt & ~(3<<30) ) - _cpuTestInterrupts(); + _cpuTestInterrupts(); // ---- IOP ------------- // * It's important to run a iopEventTest before calling ExecuteBlock. This @@ -418,11 +400,7 @@ __fi void _cpuEventTest_Shared() //if( EEsCycle < -450 ) // Console.WriteLn( " IOP ahead by: %d cycles", -EEsCycle ); - // Experimental and Probably Unnecessary Logic --> - // Check if the EE already has an exception pending, and if so we shouldn't - // waste too much time updating the IOP. Theory being that the EE and IOP should - // run closely in sync during raised exception events. But in practice it didn't - // seem to make much of a difference. + EEsCycle = psxCpu->ExecuteBlock( EEsCycle ); iopEventAction = false; } @@ -456,22 +434,10 @@ __fi void _cpuEventTest_Shared() // Apply vsync and other counter nextCycles cpuSetNextEvent( nextsCounter, nextCounter ); - - // ---- INTC / DMAC Exceptions ----------------- - // Raise the INTC and DMAC interrupts here, which usually throw exceptions. - // This should be done last since the IOP and the VU0 can raise several EE - // exceptions. - - //if ((cpuRegs.CP0.n.Status.val & 0x10007) == 0x10001) - if( cpuIntsEnabled(0x400) ) TESTINT(30, intcInterrupt); - if( cpuIntsEnabled(0x800) ) TESTINT(31, dmacInterrupt); } __ri void cpuTestINTCInts() { - // Check the internal Event System -- if one's already scheduled then don't bother: - if( cpuRegs.interrupt & (1 << 30) ) return; - // Check the COP0's Status register for general interrupt disables, and the 0x400 // bit (which is INTC master toggle). if( !cpuIntsEnabled(0x400) ) return; @@ -488,9 +454,6 @@ __ri void cpuTestINTCInts() __fi void cpuTestDMACInts() { - // Check the internal Event System -- if one's already scheduled then don't bother: - if ( cpuRegs.interrupt & (1 << 31) ) return; - // Check the COP0's Status register for general interrupt disables, and the 0x800 // bit (which is the DMAC master toggle). if( !cpuIntsEnabled(0x800) ) return; diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index eac96559b3..d865a3b2a8 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -403,8 +403,8 @@ enum EE_EventType }; extern void CPU_INT( EE_EventType n, s32 ecycle ); -extern void intcInterrupt(); -extern void dmacInterrupt(); +extern uint intcInterrupt(); +extern uint dmacInterrupt(); extern void cpuInit(); diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h index bf506a3d58..a981837907 100644 --- a/pcsx2/SaveState.h +++ b/pcsx2/SaveState.h @@ -24,7 +24,7 @@ // the lower 16 bit value. IF the change is breaking of all compatibility with old // states, increment the upper 16 bit value, and clear the lower 16 bits to 0. -static const u32 g_SaveVersion = 0x8b4a0000; +static const u32 g_SaveVersion = 0x8b4b0000; // this function is meant to be used in the place of GSfreeze, and provides a safe layer // between the GS saving function and the MTGS's needs. :) diff --git a/plugins/zzogl-pg/opengl/CMakeLists.txt b/plugins/zzogl-pg/opengl/CMakeLists.txt index 3d9c4e913b..1f83451cb4 100644 --- a/plugins/zzogl-pg/opengl/CMakeLists.txt +++ b/plugins/zzogl-pg/opengl/CMakeLists.txt @@ -92,9 +92,9 @@ set(zzoglHeaders Util.h x86.h zerogs.h - zerogsmath.h zpipe.h ZZoglCRTC.h + ZZoglMath.h ZZoglShaders.h ZZGl.h ZZLog.h) diff --git a/plugins/zzogl-pg/opengl/GLWin32.cpp b/plugins/zzogl-pg/opengl/GLWin32.cpp index da64f16fe8..0c66206fda 100644 --- a/plugins/zzogl-pg/opengl/GLWin32.cpp +++ b/plugins/zzogl-pg/opengl/GLWin32.cpp @@ -32,7 +32,6 @@ LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) switch (msg) { - case WM_DESTROY: PostQuitMessage(0); return 0; @@ -76,21 +75,21 @@ bool GLWindow::CreateWindow(void *pDisplay) rc.bottom = conf.height; WNDCLASSEX wc; - HINSTANCE hInstance = GetModuleHandle(NULL); + HINSTANCE hInstance = GetModuleHandle(NULL); // Grab An Instance For Our Window DWORD dwExStyle, dwStyle; wc.cbSize = sizeof(WNDCLASSEX); - wc.style = CS_CLASSDC; - wc.lpfnWndProc = (WNDPROC) MsgProc; - wc.cbClsExtra = 0; - wc.cbWndExtra = 0; - wc.hInstance = hInstance; - wc.hIcon = NULL; - wc.hIconSm = NULL; - wc.hCursor = NULL; - wc.hbrBackground = NULL; - wc.lpszMenuName = NULL; - wc.lpszClassName = "PS2EMU_ZEROGS"; + wc.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC; // Redraw On Move, And Own DC For Window + wc.lpfnWndProc = (WNDPROC) MsgProc; // MsgProc Handles Messages + wc.cbClsExtra = 0; // No Extra Window Data + wc.cbWndExtra = 0; // No Extra Window Data + wc.hInstance = hInstance; // Set The Instance + wc.hIcon = NULL; + wc.hIconSm = NULL; // Load The Default Icon + wc.hCursor = LoadCursor(NULL, IDC_ARROW); // Load The Arrow Pointer + wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH); // No Background Required For GL + wc.lpszMenuName = NULL; // We Don't Want A Menu + wc.lpszClassName = "PS2EMU_ZEROGS"; // Set The Class Name RegisterClassEx(&wc); @@ -102,26 +101,26 @@ bool GLWindow::CreateWindow(void *pDisplay) else { dwExStyle = WS_EX_APPWINDOW | WS_EX_WINDOWEDGE; - dwStyle = WS_OVERLAPPEDWINDOW; + dwStyle = WS_OVERLAPPEDWINDOW | WS_BORDER; } + dwStyle |= WS_CLIPSIBLINGS | WS_CLIPCHILDREN; AdjustWindowRectEx(&rc, dwStyle, false, dwExStyle); GetWindowRect(GetDesktopWindow(), &rcdesktop); - GShwnd = CreateWindowEx( - dwExStyle, - "PS2EMU_ZEROGS", - "ZeroGS", - dwStyle, - (rcdesktop.right - (rc.right - rc.left)) / 2, - (rcdesktop.bottom - (rc.bottom - rc.top)) / 2, - rc.right - rc.left, - rc.bottom - rc.top, - NULL, - NULL, - hInstance, - NULL); + GShwnd = CreateWindowEx( dwExStyle, // Extended Style For The Window + "PS2EMU_ZEROGS", // Class Name + "ZZOgl", // Window Title + dwStyle, // Selected Window Style + (rcdesktop.right - (rc.right - rc.left)) / 2, // Window Position + (rcdesktop.bottom - (rc.bottom - rc.top)) / 2, // Window Position + rc.right - rc.left, // Calculate Adjusted Window Width + rc.bottom - rc.top, // Calculate Adjusted Window Height + NULL, // No Parent Window + NULL, // No Menu + hInstance, // Instance + NULL); // Don't Pass Anything To WM_CREATE if (GShwnd == NULL) return false; @@ -197,6 +196,7 @@ bool GLWindow::DisplayWindow(int _width, int _height) dwExStyle = WS_EX_APPWINDOW | WS_EX_WINDOWEDGE; dwStyle = WS_OVERLAPPEDWINDOW; } + dwStyle |= WS_CLIPSIBLINGS | WS_CLIPCHILDREN; RECT rc; diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index 889423bd53..419ad6ed8d 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -469,10 +469,10 @@ __forceinline void _TransferLocalLocal_4() assert((gs.srcbuf.psm&0x7) == (gs.dstbuf.psm&0x7)); if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw) - ZZLog::Warn_Log("Transfer error, src width exceeded."); + ZZLog::Debug_Log("Transfer error, src width exceeded."); if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw) - ZZLog::Warn_Log("Transfer error, dst width exceeded."); + ZZLog::Debug_Log("Transfer error, dst width exceeded."); int srcstart, srcend, dststart, dstend; diff --git a/plugins/zzogl-pg/opengl/Mem.cpp b/plugins/zzogl-pg/opengl/Mem.cpp index 82a46bab47..1a65a91e2e 100644 --- a/plugins/zzogl-pg/opengl/Mem.cpp +++ b/plugins/zzogl-pg/opengl/Mem.cpp @@ -267,7 +267,7 @@ void fill_block(BLOCK b, vector& vBlockData, vector& vBilinearData, } if (floatfmt) { - Vector* psrcv = (Vector*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; + float4* psrcv = (float4*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; for(int i = 0; i < b.height; ++i) { @@ -276,7 +276,7 @@ void fill_block(BLOCK b, vector& vBlockData, vector& vBilinearData, for(int j = 0; j < b.width; ++j) { u32 temp = ((j + 1) % b.width); - Vector* pv = &psrcv[i_width + j]; + float4* pv = &psrcv[i_width + j]; pv->x = psrcf[i_width + j]; pv->y = psrcf[i_width + temp]; pv->z = psrcf[i_width2 + j]; @@ -291,7 +291,7 @@ void BLOCK::FillBlocks(vector& vBlockData, vector& vBilinearData, in FUNCLOG if (floatfmt) { vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 4); - vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(Vector)); + vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4)); } else { vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 2); } diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index 95689692cf..19ee8e5d07 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -120,8 +120,8 @@ struct BLOCK BLOCK() { memset(this, 0, sizeof(BLOCK)); } // shader constants for this block - Vector vTexBlock; - Vector vTexDims; + float4 vTexBlock; + float4 vTexDims; int width, height; // dims of one page in pixels int ox, oy, mult; int bpp; @@ -147,8 +147,8 @@ struct BLOCK ox = ox2; oy = oy2; mult = mult2; - vTexDims = Vector(BLOCK_TEXWIDTH/(float)(bw), BLOCK_TEXHEIGHT/(float)bh, 0, 0); - vTexBlock = Vector((float)bw/BLOCK_TEXWIDTH, (float)bh/BLOCK_TEXHEIGHT, ((float)ox+0.2f)/BLOCK_TEXWIDTH, ((float)oy+0.05f)/BLOCK_TEXHEIGHT); + vTexDims = float4(BLOCK_TEXWIDTH/(float)(bw), BLOCK_TEXHEIGHT/(float)bh, 0, 0); + vTexBlock = float4((float)bw/BLOCK_TEXWIDTH, (float)bh/BLOCK_TEXHEIGHT, ((float)ox+0.2f)/BLOCK_TEXWIDTH, ((float)oy+0.05f)/BLOCK_TEXHEIGHT); width = bw; height = bh; colwidth = bh / 4; diff --git a/plugins/zzogl-pg/opengl/NewRegs.cpp b/plugins/zzogl-pg/opengl/NewRegs.cpp index c258d4f475..3c51353c8d 100644 --- a/plugins/zzogl-pg/opengl/NewRegs.cpp +++ b/plugins/zzogl-pg/opengl/NewRegs.cpp @@ -638,7 +638,7 @@ void __gifCall GIFRegHandlerSCISSOR(const u32* data) Flush(); } - m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR; + m_env.CTXT[i].SCISSOR = (Vector4i)r->SCISSOR; m_env.CTXT[i].UpdateScissor();*/ ZZLog::Greg_Log("SCISSOR%d", i); diff --git a/plugins/zzogl-pg/opengl/Regs.cpp b/plugins/zzogl-pg/opengl/Regs.cpp index aafd648ab5..8ca39fe749 100644 --- a/plugins/zzogl-pg/opengl/Regs.cpp +++ b/plugins/zzogl-pg/opengl/Regs.cpp @@ -55,7 +55,7 @@ inline bool NoHighlights(int i) // if ( results[resultA] == 0 ) { // results[resultA] = 1; -// ZZLog::ERROR_LOG("%x = %d %d %d %d %d %d %d %d \n", resultA, prim->iip, (prim->tme), (prim->fge), (prim->abe) , (prim->aa1) ,(prim->fst), (prim->ctxt), (prim->fix)) ; +// ZZLog::Error_Log("%x = %d %d %d %d %d %d %d %d \n", resultA, prim->iip, (prim->tme), (prim->fge), (prim->abe) , (prim->aa1) ,(prim->fst), (prim->ctxt), (prim->fix)) ; // } // if (resultA == 0xb && ZeroGS::vb[i].zbuf.zmsk ) return false; //ATF diff --git a/plugins/zzogl-pg/opengl/Util.h b/plugins/zzogl-pg/opengl/Util.h index 75fa384167..15964f6e93 100644 --- a/plugins/zzogl-pg/opengl/Util.h +++ b/plugins/zzogl-pg/opengl/Util.h @@ -52,7 +52,7 @@ extern "C" u32 CALLBACK PS2EgetLibType(void); extern "C" u32 CALLBACK PS2EgetLibVersion2(u32 type); extern "C" char* CALLBACK PS2EgetLibName(void); -#include "zerogsmath.h" +#include "ZZoglMath.h" #include #include diff --git a/plugins/zzogl-pg/opengl/Win32/Win32.cpp b/plugins/zzogl-pg/opengl/Win32/Win32.cpp index 03ee157668..e52c6909b1 100644 --- a/plugins/zzogl-pg/opengl/Win32/Win32.cpp +++ b/plugins/zzogl-pg/opengl/Win32/Win32.cpp @@ -37,62 +37,27 @@ void CALLBACK GSkeyEvent(keyEvent *ev) #include "Win32/resource.h" -BOOL CALLBACK LoggingDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam) -{ - switch (uMsg) - { - - case WM_INITDIALOG: - - if (conf.log) CheckDlgButton(hW, IDC_LOG, true); - - return true; - - case WM_COMMAND: - switch (LOWORD(wParam)) - { - case IDCANCEL: - EndDialog(hW, true); - return true; - - case IDOK: - - if (IsDlgButtonChecked(hW, IDC_LOG)) - conf.log = 1; - else - conf.log = 0; - - SaveConfig(); - - EndDialog(hW, false); - - return true; - } - } - - return false; -} - map mapConfOpts; #define PUT_CONF(id) mapConfOpts[IDC_CONFOPT_##id] = 0x##id; -void OnInitDialog(HWND hW) +void OnAdvOK(HWND hW) { - if (!(conf.zz_options.loaded)) LoadConfig(); + conf.hacks._u32 = 0; - CheckDlgButton(hW, IDC_CONFIG_INTERLACE, conf.interlace); - CheckDlgButton(hW, IDC_CONFIG_BILINEAR, conf.bilinear); - CheckDlgButton(hW, IDC_CONFIG_DEPTHWRITE, conf.mrtdepth); - CheckRadioButton(hW, IDC_CONFIG_AANONE, IDC_CONFIG_AA4, IDC_CONFIG_AANONE + conf.aa); - CheckDlgButton(hW, IDC_CONFIG_WIREFRAME, (conf.wireframe()) ? 1 : 0); - CheckDlgButton(hW, IDC_CONFIG_CAPTUREAVI, (conf.captureAvi()) ? 1 : 0); - CheckDlgButton(hW, IDC_CONFIG_FULLSCREEN, (conf.fullscreen()) ? 1 : 0); - CheckDlgButton(hW, IDC_CONFIG_WIDESCREEN, (conf.widescreen()) ? 1 : 0); - CheckDlgButton(hW, IDC_CONFIG_BMPSS, (conf.zz_options.tga_snap) ? 1 : 0); - CheckRadioButton(hW, IDC_CONF_WIN640, IDC_CONF_WIN1280, IDC_CONF_WIN640 + conf.zz_options.dimensions); + for (map::iterator it = mapConfOpts.begin(); it != mapConfOpts.end(); ++it) + { + if (IsDlgButtonChecked(hW, it->first)) conf.hacks._u32 |= it->second; + } - prevbilinearfilter = conf.bilinear; + GSsetGameCRC(g_LastCRC, conf.hacks._u32); + SaveConfig(); + + EndDialog(hW, false); +} + +void OnInitAdvDialog(HWND hW) +{ mapConfOpts.clear(); PUT_CONF(00000001); @@ -129,45 +94,87 @@ void OnInitDialog(HWND hW) } } -void OnOK(HWND hW) +BOOL CALLBACK AdvancedDialogProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam) +{ + switch (uMsg) + { + case WM_INITDIALOG: + OnInitAdvDialog(hW); + return true; + + case WM_COMMAND: + + switch (LOWORD(wParam)) + { + case IDCANCEL: + EndDialog(hW, true); + return true; + + case IDOK: + OnAdvOK(hW); + return true; + } + } + + return false; +} + +void CALLBACK AdvancedDialog() +{ + DialogBox(hInst, + MAKEINTRESOURCE(IDD_ADV_OPTIONS), + GetActiveWindow(), + (DLGPROC)AdvancedDialogProc); +} + +void OnInitConfDialog(HWND hW) +{ + if (!(conf.zz_options.loaded)) LoadConfig(); + + TCHAR *aaName[] = {"None", "x2", "x4", "x8", "x16"}; + + for(int i=0; i<5; i++) + { + ComboBox_AddString(GetDlgItem(hW, IDC_AA_COMBO), (LPARAM)aaName[i]); + } + ComboBox_SelectString(GetDlgItem(hW, IDC_AA_COMBO), -1, (LPARAM)aaName[conf.aa]); + + TCHAR *sizeName[] = {"640 x 480", "800 x 600", "1024 x 768", "1280 x 960"}; + + for(int i=0; i<4; i++) + { + ComboBox_AddString(GetDlgItem(hW, IDC_WIN_SIZE_COMBO), (LPARAM)sizeName[i]); + } + ComboBox_SelectString(GetDlgItem(hW, IDC_WIN_SIZE_COMBO), -1, (LPARAM)sizeName[conf.zz_options.dimensions]); + + CheckDlgButton(hW, IDC_CONFIG_INTERLACE, conf.interlace); + CheckDlgButton(hW, IDC_CONFIG_BILINEAR, conf.bilinear); + CheckDlgButton(hW, IDC_CONFIG_DEPTHWRITE, conf.mrtdepth); + CheckDlgButton(hW, IDC_CONFIG_WIREFRAME, (conf.wireframe()) ? 1 : 0); + CheckDlgButton(hW, IDC_CONFIG_CAPTUREAVI, (conf.captureAvi()) ? 1 : 0); + CheckDlgButton(hW, IDC_CONFIG_FULLSCREEN, (conf.fullscreen()) ? 1 : 0); + CheckDlgButton(hW, IDC_CONFIG_WIDESCREEN, (conf.widescreen()) ? 1 : 0); + CheckDlgButton(hW, IDC_CONFIG_BMPSS, (conf.zz_options.tga_snap) ? 1 : 0); + + prevbilinearfilter = conf.bilinear; +} + +void OnConfOK(HWND hW) { u32 newinterlace = IsDlgButtonChecked(hW, IDC_CONFIG_INTERLACE); - if (!conf.interlace) conf.interlace = newinterlace; - else if (!newinterlace) conf.interlace = 2; // off + if (!conf.interlace) + conf.interlace = newinterlace; + else if (!newinterlace) + conf.interlace = 2; // off conf.bilinear = IsDlgButtonChecked(hW, IDC_CONFIG_BILINEAR); // restore - if (conf.bilinear && prevbilinearfilter) - conf.bilinear = prevbilinearfilter; + if (conf.bilinear && prevbilinearfilter) conf.bilinear = prevbilinearfilter; - //conf.mrtdepth = 1;//IsDlgButtonChecked(hW, IDC_CONFIG_DEPTHWRITE); - - if (SendDlgItemMessage(hW, IDC_CONFIG_AANONE, BM_GETCHECK, 0, 0)) - { - conf.aa = 0; - } - else if (SendDlgItemMessage(hW, IDC_CONFIG_AA2, BM_GETCHECK, 0, 0)) - { - conf.aa = 1; - } - else if (SendDlgItemMessage(hW, IDC_CONFIG_AA4, BM_GETCHECK, 0, 0)) - { - conf.aa = 2; - } - else if (SendDlgItemMessage(hW, IDC_CONFIG_AA8, BM_GETCHECK, 0, 0)) - { - conf.aa = 3; - } - else if (SendDlgItemMessage(hW, IDC_CONFIG_AA16, BM_GETCHECK, 0, 0)) - { - conf.aa = 4; - } - else - { - conf.aa = 0; - } + if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_AA_COMBO)) != -1) + conf.aa = ComboBox_GetCurSel(GetDlgItem(hW, IDC_AA_COMBO)); conf.zz_options._u32 = 0; @@ -177,22 +184,13 @@ void OnOK(HWND hW) conf.zz_options.widescreen = IsDlgButtonChecked(hW, IDC_CONFIG_WIDESCREEN) ? 1 : 0; conf.zz_options.tga_snap = IsDlgButtonChecked(hW, IDC_CONFIG_BMPSS) ? 1 : 0; - conf.hacks._u32 = 0; - - for (map::iterator it = mapConfOpts.begin(); it != mapConfOpts.end(); ++it) - { - if (IsDlgButtonChecked(hW, it->first)) conf.hacks._u32 |= it->second; - } - - GSsetGameCRC(g_LastCRC, conf.hacks._u32); - - if (SendDlgItemMessage(hW, IDC_CONF_WIN640, BM_GETCHECK, 0, 0)) + if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_WIN_SIZE_COMBO)) == 0) conf.zz_options.dimensions = GSDim_640; - else if (SendDlgItemMessage(hW, IDC_CONF_WIN800, BM_GETCHECK, 0, 0)) + else if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_WIN_SIZE_COMBO)) == 1) conf.zz_options.dimensions = GSDim_800; - else if (SendDlgItemMessage(hW, IDC_CONF_WIN1024, BM_GETCHECK, 0, 0)) + else if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_WIN_SIZE_COMBO)) == 2) conf.zz_options.dimensions = GSDim_1024; - else if (SendDlgItemMessage(hW, IDC_CONF_WIN1280, BM_GETCHECK, 0, 0)) + else if (ComboBox_GetCurSel(GetDlgItem(hW, IDC_WIN_SIZE_COMBO)) == 3) conf.zz_options.dimensions = GSDim_1280; SaveConfig(); @@ -205,19 +203,26 @@ BOOL CALLBACK ConfigureDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam) switch (uMsg) { case WM_INITDIALOG: - OnInitDialog(hW); + OnInitConfDialog(hW); return true; case WM_COMMAND: switch (LOWORD(wParam)) { + case IDC_AA_COMBO: + break; + + case IDC_ADV_BTN: + AdvancedDialog(); + return true; + case IDCANCEL: EndDialog(hW, true); return true; case IDOK: - OnOK(hW); + OnConfOK(hW); return true; } } @@ -225,13 +230,26 @@ BOOL CALLBACK ConfigureDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam) return false; } +void CALLBACK GSconfigure() +{ + DialogBox(hInst, + MAKEINTRESOURCE(IDD_CONFIG2), + GetActiveWindow(), + (DLGPROC)ConfigureDlgProc); + + if (g_nPixelShaderVer == SHADER_REDUCED) conf.bilinear = 0; +} + +s32 CALLBACK GStest() +{ + return 0; +} + BOOL CALLBACK AboutDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam) { switch (uMsg) { case WM_INITDIALOG: - //ZeroGS uses floating point render targets because A8R8G8B8 format is not sufficient for ps2 blending and this requires alpha blending on floating point render targets - //There might be a problem with pixel shader precision with older geforce models (textures will look blocky). return true; case WM_COMMAND: @@ -246,21 +264,6 @@ BOOL CALLBACK AboutDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam) return false; } -void CALLBACK GSconfigure() -{ - DialogBox(hInst, - MAKEINTRESOURCE(IDD_CONFIG), - GetActiveWindow(), - (DLGPROC)ConfigureDlgProc); - - if (g_nPixelShaderVer == SHADER_REDUCED) conf.bilinear = 0; -} - -s32 CALLBACK GStest() -{ - return 0; -} - void CALLBACK GSabout() { DialogBox(hInst, diff --git a/plugins/zzogl-pg/opengl/Win32/resrc1.h b/plugins/zzogl-pg/opengl/Win32/resrc1.h index c4259633ae..0c2e913e95 100644 --- a/plugins/zzogl-pg/opengl/Win32/resrc1.h +++ b/plugins/zzogl-pg/opengl/Win32/resrc1.h @@ -5,7 +5,6 @@ #define IDC_CONF_DEFAULT 3 #define IDR_DATA1 112 #define IDD_ADV_OPTIONS 113 -#define IDD_DIALOG1 114 #define IDD_CONFIG2 114 #define IDC_ABOUTTEXT 1015 #define IDC_CONFIG_AA 1016 @@ -52,12 +51,15 @@ #define IDC_CONFOPT_00004000 1047 #define IDC_BUTTON1 1048 #define IDC_CONFOPT_COMPUTEOR 1048 +#define IDC_ADV_BTN 1048 #define IDC_CONFOPT_4001 1049 #define IDC_CONFOPT_00000010 1049 #define IDC_CONFOPT_00008000 1050 #define IDC_CONFOPT_00010000 1052 #define IDC_CONFOPT_00020000 1054 +#define IDC_AA_COMBO 1054 #define IDC_CONFOPT_00000002 1055 +#define IDC_WIN_SIZE_COMBO 1055 #define IDC_CONFOPT_01000000 1056 #define IDC_CONFOPT_00800000 1057 #define IDC_CONFOPT_00000008 1058 @@ -80,7 +82,7 @@ #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 116 #define _APS_NEXT_COMMAND_VALUE 40001 -#define _APS_NEXT_CONTROL_VALUE 1051 +#define _APS_NEXT_CONTROL_VALUE 1056 #define _APS_NEXT_SYMED_VALUE 101 #endif #endif diff --git a/plugins/zzogl-pg/opengl/Win32/zerogs.rc b/plugins/zzogl-pg/opengl/Win32/zerogs.rc index 5655a1bbbd..2aab0fd2d6 100644 --- a/plugins/zzogl-pg/opengl/Win32/zerogs.rc +++ b/plugins/zzogl-pg/opengl/Win32/zerogs.rc @@ -206,32 +206,28 @@ BEGIN "Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,266,365,8 END -IDD_CONFIG2 DIALOGEX 0, 0, 171, 217 +IDD_CONFIG2 DIALOGEX 0, 0, 159, 160 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU CAPTION "ZZOgl Options" FONT 8, "MS Shell Dlg", 400, 0, 0x1 BEGIN - DEFPUSHBUTTON "OK",IDOK,55,192,50,14 - PUSHBUTTON "Cancel",IDCANCEL,108,192,50,14 - GROUPBOX "Static",IDC_STATIC,7,7,152,183 - CONTROL "Logging (For Debugging)",1000,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,18,102,10 + DEFPUSHBUTTON "OK",IDOK,37,138,50,14 + PUSHBUTTON "Cancel",IDCANCEL,91,138,50,14 + CONTROL "Logging (For Debugging)",1000,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,7,102,10 CONTROL "Interlace Enable (toggle with F5). There are 2 modes + interlace off",IDC_CONFIG_INTERLACE, - "Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,45,137,18 + "Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,7,32,137,18 CONTROL "Bilinear Filtering (Shift+F5). Best quality is on, turn off for speed.",IDC_CONFIG_BILINEAR, - "Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,67,137,18 + "Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,7,50,137,18 CONTROL "Capture Avi (zerogs.avi) (F12)",IDC_CONFIG_CAPTUREAVI, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,103,109,10 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,82,109,10 CONTROL "Save Snapshots as BMP(default is JPG)",IDC_CONFIG_BMPSS, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,14,116,141,10 - CONTROL "Wide Screen",IDC_CONFIG_WIDESCREEN,"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,14,90,109,10 - CONTROL "640 x 480",IDC_CONF_WIN640,"Button",BS_AUTORADIOBUTTON | WS_GROUP,20,140,59,8 - CONTROL "800 x 600",IDC_CONF_WIN800,"Button",BS_AUTORADIOBUTTON,21,152,59,8 - CONTROL "1024 x 768",IDC_CONF_WIN1024,"Button",BS_AUTORADIOBUTTON,86,140,59,8 - CONTROL "1280 x 960",IDC_CONF_WIN1280,"Button",BS_AUTORADIOBUTTON,86,151,53,8 - GROUPBOX "Default Window Size (no speed impact)",IDC_STATIC,14,129,137,39 - COMBOBOX IDC_COMBO1,59,31,48,30,CBS_DROPDOWNLIST | CBS_SORT | WS_VSCROLL | WS_TABSTOP - LTEXT "Anti-aliasing",IDC_STATIC,15,33,43,13 - PUSHBUTTON "Advanced...",IDC_BUTTON1,17,170,134,14 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,93,141,10 + CONTROL "Wide Screen",IDC_CONFIG_WIDESCREEN,"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,7,69,109,10 + LTEXT "Anti-aliasing",IDC_STATIC,7,20,43,13 + PUSHBUTTON "Advanced...",IDC_ADV_BTN,7,118,134,14 + COMBOBOX IDC_AA_COMBO,53,18,48,30,CBS_DROPDOWN | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_WIN_SIZE_COMBO,78,104,62,30,CBS_DROPDOWN | WS_VSCROLL | WS_TABSTOP + LTEXT "Default Window Size",IDC_STATIC,7,106,68,8 END @@ -277,9 +273,9 @@ BEGIN IDD_CONFIG2, DIALOG BEGIN LEFTMARGIN, 7 - RIGHTMARGIN, 164 + RIGHTMARGIN, 152 TOPMARGIN, 7 - BOTTOMMARGIN, 210 + BOTTOMMARGIN, 152 END END #endif // APSTUDIO_INVOKED @@ -311,27 +307,6 @@ END #endif // APSTUDIO_INVOKED - -///////////////////////////////////////////////////////////////////////////// -// -// Dialog Info -// - -IDD_CONFIG2 DLGINIT -BEGIN - IDC_COMBO1, 0x403, 5, 0 -0x6f4e, 0x656e, "\000" - IDC_COMBO1, 0x403, 3, 0 -0x5832, "\000" - IDC_COMBO1, 0x403, 3, 0 -0x5834, "\000" - IDC_COMBO1, 0x403, 3, 0 -0x5838, "\000" - IDC_COMBO1, 0x403, 4, 0 -0x3631, 0x0058, - 0 -END - #endif // English (U.S.) resources ///////////////////////////////////////////////////////////////////////////// diff --git a/plugins/zzogl-pg/opengl/Win32/zerogsogl_2008.vcproj b/plugins/zzogl-pg/opengl/Win32/zerogsogl_2008.vcproj index 2862c0a2da..9b2502170d 100644 --- a/plugins/zzogl-pg/opengl/Win32/zerogsogl_2008.vcproj +++ b/plugins/zzogl-pg/opengl/Win32/zerogsogl_2008.vcproj @@ -481,10 +481,6 @@ RelativePath="..\zerogs.h" > - - @@ -497,6 +493,10 @@ RelativePath="..\ZZoglFlushHack.h" > + + @@ -528,11 +528,11 @@ diff --git a/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp b/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp index 64a6b31374..768eb2d1f4 100644 --- a/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp @@ -50,7 +50,7 @@ extern bool g_bMakeSnapshot; extern string strSnapshot; // Adjusts vertex shader BitBltPos vector v to preserve aspect ratio. It used to emulate 4:3 or 16:9. -void ZeroGS::AdjustTransToAspect(Vector& v) +void ZeroGS::AdjustTransToAspect(float4& v) { double temp; float f; @@ -242,11 +242,11 @@ inline void RenderStartHelper(u32 bInterlace) // on image y coords. So if we write valpha.z * F + valpha.w + 0.5, it would be switching odd // and even strings at each frame. // valpha.x and y are used for image blending. -inline Vector RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTSHADER* prog) +inline float4 RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTSHADER* prog) { SetShaderCaller("RenderGetForClip"); - Vector valpha; + float4 valpha; // first render the current render targets, then from ptexMem if (psm == 1) @@ -282,7 +282,7 @@ inline Vector RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTS valpha.w = 1; } - ZZshSetParameter4fv(prog->sOneColor, valpha, "g_fOneColor"); + ZZshSetParameter4fv(prog->prog, prog->sOneColor, valpha, "g_fOneColor"); return valpha; } @@ -295,7 +295,7 @@ inline void RenderCreateInterlaceTex(u32 bInterlace, int th, FRAGMENTSHADER* pro int interlacetex = CreateInterlaceTex(2 * th); - ZZshGLSetTextureParameter(prog->sInterlace, interlacetex, "Interlace"); + ZZshGLSetTextureParameter(prog->prog, prog->sInterlace, interlacetex, "Interlace"); } // Well, do blending setup prior to second pass of half-frame drawing @@ -396,10 +396,10 @@ inline int RenderGetOffsets(int* dby, int* movy, tex0Info& texframe, CRenderTarg } // BltBit shader calculate vertex (4 coord's pixel) position at the viewport. -inline Vector RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace) +inline float4 RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace) { SetShaderCaller("RenderSetTargetBitPos"); - Vector v; + float4 v; // dest rect v.x = 1; v.y = dh / (float)th; @@ -416,7 +416,7 @@ inline Vector RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace) v.w += 1.0f / (float)dh ; } - ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos"); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltPos, v, "g_fBitBltPos"); return v; } @@ -425,12 +425,12 @@ inline Vector RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace) // For example, use tw / X and tw / X magnify the viewport. // Interlaced output is little out of VB, it could be seen as an evil blinking line on top // and bottom, so we try to remove it. -inline Vector RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool isInterlace) +inline float4 RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool isInterlace) { SetShaderCaller("RenderSetTargetBitTex"); - Vector v; - v = Vector(th, tw, dh, dw); + float4 v; + v = float4(th, tw, dh, dw); // Incorrect Aspect ratio on interlaced frames @@ -440,28 +440,28 @@ inline Vector RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool v.w += 1.0f / conf.height; } - ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); return v; } // Translator for POSITION coordinates (-1.0:+1.0f at x axis, +1.0f:-1.0y at y) into target frame ones. // We don't need x coordinate, because interlacing is y-axis only. -inline Vector RenderSetTargetBitTrans(int th) +inline float4 RenderSetTargetBitTrans(int th) { SetShaderCaller("RenderSetTargetBitTrans"); - Vector v = Vector(float(th), -float(th), float(th), float(th)); - ZZshSetParameter4fv(pvsBitBlt.fBitBltTrans, v, "g_fBitBltTrans"); + float4 v = float4(float(th), -float(th), float(th), float(th)); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.fBitBltTrans, v, "g_fBitBltTrans"); return v; } // use g_fInvTexDims to store inverse texture dims // Seems, that Targ shader does not use it -inline Vector RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHADER* prog) +inline float4 RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHADER* prog) { SetShaderCaller("RenderSetTargetInvTex"); - Vector v = Vector(0, 0, 0, 0); + float4 v = float4(0, 0, 0, 0); if (prog->sInvTexDims) { @@ -469,7 +469,7 @@ inline Vector RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHAD v.y = 1.0f / (float)th; v.z = (float)0.0; v.w = -0.5f / (float)th; - ZZshSetParameter4fv(prog->sInvTexDims, v, "g_fInvTexDims"); + ZZshSetParameter4fv(prog->prog, prog->sInvTexDims, v, "g_fInvTexDims"); } return v; @@ -544,17 +544,17 @@ inline void RenderCheckForTargets(tex0Info& texframe, list& list SetShaderCaller("RenderCheckForTargets"); // Texture - Vector v = RenderSetTargetBitTex((float)RW(texframe.tw), (float)RH(dh), (float)RW(pfb->DBX), (float)RH(dby), INTERLACE_COUNT); + float4 v = RenderSetTargetBitTex((float)RW(texframe.tw), (float)RH(dh), (float)RW(pfb->DBX), (float)RH(dby), INTERLACE_COUNT); // dest rect v = RenderSetTargetBitPos(dh, texframe.th, movy, INTERLACE_COUNT); v = RenderSetTargetBitTrans(ptarg->fbh); v = RenderSetTargetInvTex(bInterlace, texframe.tbw, ptarg->fbh, &ppsCRTCTarg[bInterlace]) ; // FIXME. This is no use - Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTCTarg[bInterlace]); + float4 valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTCTarg[bInterlace]); // inside vb[0]'s target area, so render that region only - ZZshGLSetTextureParameter(ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex, "CRTC target"); + ZZshGLSetTextureParameter(ppsCRTCTarg[bInterlace].prog, ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex, "CRTC target"); RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTCTarg[bInterlace]); ZZshSetPixelShader(ppsCRTCTarg[bInterlace].prog); @@ -582,7 +582,7 @@ inline void RenderCheckForTargets(tex0Info& texframe, list& list // this is the function that does it. inline void RenderCheckForMemory(tex0Info& texframe, list& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace) { - Vector v; + float4 v; for (list::iterator it = listTargs.begin(); it != listTargs.end(); ++it) { @@ -624,9 +624,9 @@ inline void RenderCheckForMemory(tex0Info& texframe, list& listT v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT); v = RenderSetTargetBitTrans(texframe.th); v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]); - Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]); + float4 valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]); - ZZshGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory"); + ZZshGLSetTextureParameter(ppsCRTC[bInterlace].prog, ppsCRTC[bInterlace].sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory"); RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]); ZZshSetPixelShader(ppsCRTC[bInterlace].prog); diff --git a/plugins/zzogl-pg/opengl/ZZoglCRTC.h b/plugins/zzogl-pg/opengl/ZZoglCRTC.h index 3b617fca1e..7a1cb474fd 100644 --- a/plugins/zzogl-pg/opengl/ZZoglCRTC.h +++ b/plugins/zzogl-pg/opengl/ZZoglCRTC.h @@ -63,7 +63,7 @@ extern int s_nNewWidth, s_nNewHeight; extern CRangeManager s_RangeMngr; // manages overwritten memory extern void FlushTransferRanges(const tex0Info* ptex); extern void ProcessMessages(); -void AdjustTransToAspect(Vector& v); +void AdjustTransToAspect(float4& v); // Interlace texture is lazy 1*(height) array of 1 and 0. // If its height (named s_nInterlaceTexWidth here) is hanging we must redo diff --git a/plugins/zzogl-pg/opengl/ZZoglCreate.cpp b/plugins/zzogl-pg/opengl/ZZoglCreate.cpp index 941a0f4ddf..81d6040284 100644 --- a/plugins/zzogl-pg/opengl/ZZoglCreate.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglCreate.cpp @@ -82,8 +82,8 @@ extern void KickTriangleFan(); extern void KickSprite(); extern void KickDummy(); extern bool LoadEffects(); -extern bool LoadExtraEffects(); -extern FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); +extern bool ZZshLoadExtraEffects(); +extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); GLuint vboRect = 0; vector g_vboBuffers; // VBOs for all drawing commands @@ -127,7 +127,6 @@ void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum) = NULL; // State parameters extern u8* s_lpShaderResources; -ZZshProgram pvs[16] = {NULL}; // String's for shader file in developer mode #ifdef DEVBUILD diff --git a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp index c47b4cc2d0..a457cebd8a 100644 --- a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp @@ -156,7 +156,7 @@ int s_nWriteDestAlphaTest = 0; // ZZ //////////////////// // State parameters -static Vector vAlphaBlendColor; // used for GPU_COLOR +static float4 vAlphaBlendColor; // used for GPU_COLOR static bool bNeedBlendFactorInAlpha; // set if the output source alpha is different from the real source alpha (only when blend factor > 0x80) static u32 s_dwColorWrite = 0xf; // the color write mask of the current target @@ -310,7 +310,7 @@ void ZeroGS::ReloadEffects() memset(ppsTexture, 0, sizeof(ppsTexture)); - LoadExtraEffects(); + ZZshLoadExtraEffects(); #endif } @@ -830,11 +830,11 @@ inline int FlushGetShaderType(VB& curvb, CRenderTarget* ptextarg, GLuint& ptexcl //Set page offsets depends on shader type. -inline Vector FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRenderTarget* ptextarg) +inline float4 FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRenderTarget* ptextarg) { SetShaderCaller("FlushSetPageOffset"); - Vector vpageoffset; + float4 vpageoffset; vpageoffset.w = 0; switch (shadertype) @@ -863,14 +863,14 @@ inline Vector FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRen } //Set texture offsets depends omn shader type. -inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg) +inline float4 FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg) { SetShaderCaller("FlushSetTexOffset"); - Vector v; + float4 v; if (shadertype == 3) { - Vector v; + float4 v; v.x = 16.0f / (float)curvb.tex0.tw; v.y = 16.0f / (float)curvb.tex0.th; v.z = 0.5f * v.x; @@ -879,7 +879,7 @@ inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c } else if (shadertype == 4) { - Vector v; + float4 v; v.x = 16.0f / (float)ptextarg->fbw; v.y = 16.0f / (float)ptextarg->fbh; v.z = -1; @@ -891,10 +891,10 @@ inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c } // Set dimension (Real!) of texture. z and w -inline Vector FlushTextureDims(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg) +inline float4 FlushTextureDims(FRAGMENTSHADER* pfragment, int shadertype, VB& curvb, CRenderTarget* ptextarg) { SetShaderCaller("FlushTextureDims"); - Vector vTexDims; + float4 vTexDims; vTexDims.x = (float)RW(curvb.tex0.tw) ; vTexDims.y = (float)RH(curvb.tex0.th) ; @@ -958,14 +958,14 @@ inline FRAGMENTSHADER* FlushUseExistRenderTarget(VB& curvb, CRenderTarget* ptext //int psm = PIXEL_STORAGE_FORMAT(curvb.tex0); int shadertype = FlushGetShaderType(curvb, ptextarg, ptexclut); - FRAGMENTSHADER* pfragment = LoadShadeEffect(shadertype, 0, curvb.curprim.fge, + FRAGMENTSHADER* pfragment = ZZshLoadShadeEffect(shadertype, 0, curvb.curprim.fge, IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL); - Vector vpageoffset = FlushSetPageOffset(pfragment, shadertype, ptextarg); + float4 vpageoffset = FlushSetPageOffset(pfragment, shadertype, ptextarg); - Vector v = FlushSetTexOffset(pfragment, shadertype, curvb, ptextarg); + float4 v = FlushSetTexOffset(pfragment, shadertype, curvb, ptextarg); - Vector vTexDims = FlushTextureDims(pfragment, shadertype, curvb, ptextarg); + float4 vTexDims = FlushTextureDims(pfragment, shadertype, curvb, ptextarg); if (pfragment->sCLUT != NULL && ptexclut != 0) ZZshGLSetTextureParameter(pfragment->sCLUT, ptexclut, "CLUT"); @@ -997,7 +997,7 @@ inline FRAGMENTSHADER* FlushMadeNewTarget(VB& curvb, int exactcolor, int context } } - FRAGMENTSHADER* pfragment = LoadShadeEffect(0, GetTexFilter(curvb.tex1), curvb.curprim.fge, + FRAGMENTSHADER* pfragment = ZZshLoadShadeEffect(0, GetTexFilter(curvb.tex1), curvb.curprim.fge, IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL); if (pfragment == NULL) @@ -1160,7 +1160,7 @@ inline u32 AlphaRenderAlpha(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pf } // harvest fishing - Vector v = vAlphaBlendColor; + float4 v = vAlphaBlendColor; if (exactcolor) { @@ -1173,7 +1173,7 @@ inline u32 AlphaRenderAlpha(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pf else { // not using blending so set to defaults - Vector v = exactcolor ? Vector(1, 510 * 255.0f / 256.0f, 0, 0) : Vector(1, 2 * 255.0f / 256.0f, 0, 0); + float4 v = exactcolor ? float4(1, 510 * 255.0f / 256.0f, 0, 0) : float4(1, 2 * 255.0f / 256.0f, 0, 0); ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); } @@ -1257,7 +1257,7 @@ inline void AlphaPabe(VB& curvb, FRAGMENTSHADER* pfragment, int exactcolor) glDisable(GL_BLEND); GL_STENCILFUNC_SET(); - Vector v; + float4 v; v.x = 1; v.y = 2; v.z = 0; @@ -1330,7 +1330,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE if (gs.pabe && bCanRenderStencil) { // only render the pixels with alpha values >= 0x80 - Vector v = vAlphaBlendColor; + float4 v = vAlphaBlendColor; if (exactcolor) { v.y *= 255; v.w *= 255; } @@ -1350,7 +1350,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE glDisable(GL_BLEND); GL_STENCILFUNC_SET(); - Vector v; + float4 v; v.x = 1; v.y = 2; v.z = 0; @@ -1409,7 +1409,7 @@ inline void AlphaSpecialTesting(VB& curvb, FRAGMENTSHADER* pfragment, u32 dwUsin glStencilFunc(GL_EQUAL, STENCIL_SPECIAL | STENCIL_PIXELWRITE, STENCIL_SPECIAL | STENCIL_PIXELWRITE); glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); - Vector v = Vector(0, exactcolor ? 510.0f : 2.0f, 0, 0); + float4 v = float4(0, exactcolor ? 510.0f : 2.0f, 0, 0); ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); Draw(curvb); @@ -1560,7 +1560,7 @@ inline void ZeroGS::RenderFBA(const VB& curvb, ZZshParameter sOneColor) glAlphaFunc(GL_GEQUAL, 1); - Vector v(1,2,0,0); + float4 v(1,2,0,0); ZZshSetParameter4fv(sOneColor, v, "g_fOneColor"); @@ -1599,7 +1599,7 @@ __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, ZZshParameter sOneCo SetShaderCaller("RenderAlphaTest"); - Vector v(1,2,0,0); + float4 v(1,2,0,0); ZZshSetParameter4fv(sOneColor, v, "g_fOneColor"); @@ -1624,7 +1624,7 @@ __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, ZZshParameter sOneCo if (curvb.test.ate && curvb.test.atst > 1 && curvb.test.aref > 0x80) { - v = Vector(1,1,0,0); + v = float4(1,1,0,0); ZZshSetParameter4fv(sOneColor, v, "g_fOneColor"); glAlphaFunc(g_dwAlphaCmp[curvb.test.atst], AlphaReferedValue(curvb.test.aref)); } @@ -1925,12 +1925,12 @@ void ZeroGS::SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint) } // clamp relies on texture width -void ZeroGS::SetTexClamping(int context, FRAGMENTSHADER* pfragment) +void SetTexClamping(int context, FRAGMENTSHADER* pfragment) { FUNCLOG SetShaderCaller("SetTexClamping"); clampInfo* pclamp = &ZeroGS::vb[context].clamp; - Vector v, v2; + float4 v, v2; v.x = v.y = 0; u32* ptex = ZeroGS::vb[context].ptexClamp; ptex[0] = ptex[1] = 0; @@ -2015,8 +2015,8 @@ void ZeroGS::SetTexClamping(int context, FRAGMENTSHADER* pfragment) } -// Fixme should be in Vector lib -inline bool equal_vectors(Vector a, Vector b) +// Fixme should be in float4 lib +inline bool equal_vectors(float4 a, float4 b) { if (abs(a.x - b.x) + abs(a.y - b.y) + abs(a.z - b.z) + abs(a.w - b.w) < 0.01) return true; @@ -2033,7 +2033,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) assert(!vb[context].bNeedTexCheck); - Vector v, v2; + float4 v, v2; tex0Info& tex0 = vb[context].tex0; @@ -2045,14 +2045,14 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) SetShaderCaller("SetTexVariables"); // alpha and texture highlighting - Vector valpha, valpha2 ; + float4 valpha, valpha2 ; // if clut, use the frame format int psm = PIXEL_STORAGE_FORMAT(tex0); // ZZLog::Error_Log( "A %d psm, is-clut %d. cpsm %d | %d %d", psm, PSMT_ISCLUT(psm), tex0.cpsm, tex0.tfx, tex0.tcc ); - Vector vblack; + float4 vblack; vblack.x = vblack.y = vblack.z = vblack.w = 10; /* tcc -- Tecture Color Component 0=RGB, 1=RGBA + use Alpha from TEXA reg when not in PSM @@ -2096,7 +2096,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) /* // Test, old code. - Vector valpha3, valpha4; + float4 valpha3, valpha4; switch(tex0.tfx) { case 0: valpha3.z = 0; valpha3.w = 0; @@ -2206,7 +2206,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force) { FUNCLOG - Vector v; + float4 v; CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(tex0, 1); assert( pmemtarg != NULL && pfragment != NULL && pmemtarg->ptex != NULL); @@ -2248,7 +2248,7 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, float fbw = (float)tex0.tbw; - Vector vTexDims; + float4 vTexDims; vTexDims.x = b.vTexDims.x * (fw); vTexDims.y = b.vTexDims.y * (fh); @@ -2291,7 +2291,7 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims"); -// ZZshSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from Vector to float[4] is ok. +// ZZshSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from float4 to float[4] is ok. ZZshSetParameter4fv(pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock"); ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset"); @@ -2403,7 +2403,7 @@ void ZeroGS::SetAlphaVariables(const alphaInfo& a) s_rgbeq = 1; // s_alphaInfo = a; - vAlphaBlendColor = Vector(1, 2 * 255.0f / 256.0f, 0, 0); + vAlphaBlendColor = float4(1, 2 * 255.0f / 256.0f, 0, 0); u32 usec = a.c; diff --git a/plugins/zzogl-pg/opengl/zerogsmath.h b/plugins/zzogl-pg/opengl/ZZoglMath.h similarity index 53% rename from plugins/zzogl-pg/opengl/zerogsmath.h rename to plugins/zzogl-pg/opengl/ZZoglMath.h index 3177e710db..dab2440566 100644 --- a/plugins/zzogl-pg/opengl/zerogsmath.h +++ b/plugins/zzogl-pg/opengl/ZZoglMath.h @@ -2,12 +2,15 @@ * * Zerofrog's ZeroGS KOSMOS (c)2005-2008 * - * Zerofrog forgot to write any copyright notice after release the plugin into GPLv2 + * Zerofrog forgot to write any copyright notice after releasing the plugin into GPLv2 * If someone can contact him successfully to clarify this matter that would be great. */ -#ifndef ZEROGS_MATH_H -#define ZEROGS_MATH_H +// Now that it's down to 82 lines, and most of it's fairly obvious, perhaps it'd be easier to +// just reimplement it... -arcum42 + +#ifndef ZZOGLMATH_H_INCLUDED +#define ZZOGLMATH_H_INCLUDED #ifndef _WIN32 #include @@ -22,16 +25,16 @@ typedef float dReal; // class used for 3 and 4 dim vectors and quaternions // It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used -class Vector +class float4 { public: dReal x, y, z, w; - Vector() : x(0), y(0), z(0), w(0) {} - Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {} - Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {} - Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {} - Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; } + float4() : x(0), y(0), z(0), w(0) {} + float4(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {} + float4(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {} + float4(const float4 &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {} + float4(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; } dReal operator[](int i) const { return (&x)[i]; } dReal& operator[](int i) { return (&x)[i]; } @@ -40,7 +43,7 @@ class Vector operator const dReal*() const { return (const dReal*)&x; } // SCALAR FUNCTIONS - inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; } + inline dReal dot(const float4 &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; } inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; } inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; } inline void SetColor(u32 color) @@ -53,28 +56,28 @@ class Vector // 3 dim cross product, w is not touched /// this = this x v /// this = u x v - inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; } - inline Vector operator+(const Vector &r) const { Vector v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; } - inline Vector operator-(const Vector &r) const { Vector v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; } - inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; } - inline Vector operator*(dReal k) const { Vector v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; } - inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; } - inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; } - inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; } - inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; } - inline Vector& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; } - friend Vector operator*(float f, const Vector& v); - //friend ostream& operator<<(ostream& O, const Vector& v); - //friend istream& operator>>(istream& I, Vector& v); + inline float4 operator-() const { float4 v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; } + inline float4 operator+(const float4 &r) const { float4 v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; } + inline float4 operator-(const float4 &r) const { float4 v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; } + inline float4 operator*(const float4 &r) const { float4 v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; } + inline float4 operator*(dReal k) const { float4 v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; } + inline float4& operator += (const float4& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; } + inline float4& operator -= (const float4& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; } + inline float4& operator *= (const float4& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; } + inline float4& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; } + inline float4& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; } + friend float4 operator*(float f, const float4& v); + //friend ostream& operator<<(ostream& O, const float4& v); + //friend istream& operator>>(istream& I, float4& v); }; -inline Vector operator*(float f, const Vector& left) +inline float4 operator*(float f, const float4& left) { - Vector v; + float4 v; v.x = f * left.x; v.y = f * left.y; v.z = f * left.z; return v; -} - -#endif +} + +#endif // ZZOGLMATH_H_INCLUDED diff --git a/plugins/zzogl-pg/opengl/ZZoglShaders.cpp b/plugins/zzogl-pg/opengl/ZZoglShaders.cpp index 658d763de5..136606cbf6 100644 --- a/plugins/zzogl-pg/opengl/ZZoglShaders.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglShaders.cpp @@ -1,6 +1,6 @@ /* ZZ Open GL graphics plugin - * Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com - * Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008 + * Copyright (c)2009 zeydlitz@gmail.com + * Based on Zerofrog's ZeroGS KOSMOS (c)2005-2006 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,18 +14,27 @@ * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +//#ifdef NVIDIA_CG_API // This code is only for NVIDIA cg-toolkit API // ZZogl Shader manipulation functions. //------------------- Includes #include "zerogs.h" #include "ZZoglShaders.h" #include "zpipe.h" + + +#ifdef _WIN32 +# include "Win32.h" +extern HINSTANCE hInst; +#endif // ----------------- Defines +using namespace ZeroGS; + #define TEXWRAP_REPEAT 0 #define TEXWRAP_CLAMP 1 #define TEXWRAP_REGION_REPEAT 2 @@ -33,7 +42,6 @@ #define SH_WRITEDEPTH 0x2000 // depth is written #define SH_CONTEXT1 0x1000 // context1 is used - #define SH_REGULARVS 0x8000 #define SH_TEXTUREVS 0x8001 #define SH_REGULARFOGVS 0x8002 @@ -58,35 +66,54 @@ #define SH_CRTC_NEARESTPS 0x8022 #define SH_CRTCINTER_NEARESTPS 0x8023 - -using namespace ZeroGS; //------------------ Constants -// ----------------- Global Variables +const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex16to8h" }; -namespace ZeroGS -{ -FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; -FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; -VERTEXSHADER pvsBitBlt; +// ----------------- Global Variables + +ZZshContext g_cgcontext; +ZZshProfile cgvProf, cgfProf; +int g_nPixelShaderVer = 0; // default +u8* s_lpShaderResources = NULL; +ZZshProgram pvs[16] = {NULL}; +ZZshProgram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ +ZZshParameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0; + +#ifdef DEVBUILD +char* EFFECT_NAME; // All this variables used for testing and set manually +char* EFFECT_DIR; +#endif + +bool g_bCRTCBilinear = true; + +namespace ZeroGS { + float4 g_vdepth, vlogz; + FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; + FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; + FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS]; + FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2]; + VERTEXSHADER pvsBitBlt; + + inline bool LoadEffects(); } +struct SHADERHEADER +{ + unsigned int index, offset, size; // if highest bit of index is set, pixel shader +}; +map mapShaderResources; + // Debug variable, store name of the function that call the shader. const char* ShaderCallerName = ""; const char* ShaderHandleName = ""; -extern u32 ptexBlocks; // holds information on block tiling. Its texture number in OpenGL -- if 0 than such texture -extern u32 ptexConv16to32; // does not exist. This textures should be created on start and released on finish. -extern u32 ptexConv32to16; -bool g_bCRTCBilinear = true; -u8* s_lpShaderResources = NULL; -map mapShaderResources; -ZZshContext g_cgcontext; -ZZshProfile cgvProf, cgfProf; -int g_nPixelShaderVer = 0; // default - //------------------ Code +inline int GET_SHADER_INDEX(int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int context, int ps) { + return type + texfilter*NUM_TYPES + NUM_FILTERS*NUM_TYPES*texwrap + NUM_TEXWRAPS*NUM_FILTERS*NUM_TYPES*(fog+2*writedepth+4*testaem+8*exactcolor+16*context+32*ps) ; +} + bool ZZshCheckProfilesSupport() { // load the effect, find the best profiles (if any) if (cgGLIsProfileSupported(CG_PROFILE_ARBVP1) != CG_TRUE) { @@ -103,10 +130,10 @@ bool ZZshCheckProfilesSupport() { // Error handler. Setup in ZZogl_Create once. void HandleCgError(ZZshContext ctx, ZZshError err, void* appdata) { - ZZLog::Error_Log("%s->%s: %s", ShaderCallerName, ShaderHandleName, cgGetErrorString(err)); + ZZLog::Error_Log("%s->%s: %s\n", ShaderCallerName, ShaderHandleName, cgGetErrorString(err)); const char* listing = cgGetLastListing(g_cgcontext); - - if (listing != NULL) ZZLog::Debug_Log(" Last listing: %s", listing); + if (listing != NULL) + ZZLog::Debug_Log(" last listing: %s\n", listing); } bool ZZshStartUsingShaders() { @@ -128,7 +155,7 @@ bool ZZshStartUsingShaders() { g_vparamPosXY[1] = cgCreateParameter(g_cgcontext, CG_FLOAT4); - ZZLog::Debug_Log("Creating effects."); + ZZLog::GS_Log("Creating effects."); B_G(LoadEffects(), return false); // create a sample shader @@ -139,11 +166,11 @@ bool ZZshStartUsingShaders() { g_nPixelShaderVer = 0;//SHADER_ACCURATE; // test bool bFailed; - FRAGMENTSHADER* pfrag = LoadShadeEffect(0, 1, 1, 1, 1, temp, 0, &bFailed); + FRAGMENTSHADER* pfrag = ZZshLoadShadeEffect(0, 1, 1, 1, 1, temp, 0, &bFailed); if( bFailed || pfrag == NULL ) { g_nPixelShaderVer = SHADER_ACCURATE|SHADER_REDUCED; - pfrag = LoadShadeEffect(0, 0, 1, 1, 0, temp, 0, &bFailed); + pfrag = ZZshLoadShadeEffect(0, 0, 1, 1, 0, temp, 0, &bFailed); if( pfrag != NULL ) cgGLLoadProgram(pfrag->prog); if( bFailed || pfrag == NULL || cgGetError() != CG_NO_ERROR ) { @@ -155,10 +182,65 @@ bool ZZshStartUsingShaders() { if (g_nPixelShaderVer & SHADER_REDUCED) conf.bilinear = 0; - ZZLog::Debug_Log("Creating extra effects."); - B_G(LoadExtraEffects(), return false); + ZZLog::GS_Log("Creating extra effects."); + B_G(ZZshLoadExtraEffects(), return false); - ZZLog::Debug_Log("using %s shaders.", g_pShaders[g_nPixelShaderVer]); + ZZLog::GS_Log("using %s shaders\n", g_pShaders[g_nPixelShaderVer]); + return true; +} + +// open shader file according to build target +bool ZZshCreateOpenShadersFile() { +#ifndef DEVBUILD +# ifdef _WIN32 + HRSRC hShaderSrc = FindResource(hInst, MAKEINTRESOURCE(IDR_SHADERS), RT_RCDATA); + assert( hShaderSrc != NULL ); + HGLOBAL hShaderGlob = LoadResource(hInst, hShaderSrc); + assert( hShaderGlob != NULL ); + s_lpShaderResources = (u8*)LockResource(hShaderGlob); +# else // not _WIN32 + FILE* fres = fopen("ps2hw.dat", "rb"); + if( fres == NULL ) { + fres = fopen("plugins/ps2hw.dat", "rb"); + if( fres == NULL ) { + ZZLog::Error_Log("Cannot find ps2hw.dat in working directory. Exiting."); + return false; + } + } + fseek(fres, 0, SEEK_END); + size_t s = ftell(fres); + s_lpShaderResources = new u8[s+1]; + fseek(fres, 0, SEEK_SET); + fread(s_lpShaderResources, s, 1, fres); + s_lpShaderResources[s] = 0; +# endif // _WIN32 +#else // NOT RELEASE_TO_PUBLIC +# ifndef _WIN32 // NOT WINDOWS + // test if ps2hw.fx exists + char tempstr[255]; + char curwd[255]; + getcwd(curwd, ARRAY_SIZE(curwd)); + + strcpy(tempstr, "/plugins/"); + sprintf(EFFECT_NAME, "%sps2hw.fx", tempstr); + FILE* f = fopen(EFFECT_NAME, "r"); + if( f == NULL ) { + + strcpy(tempstr, "../../plugins/zzogl-pg/opengl/"); + sprintf(EFFECT_NAME, "%sps2hw.fx", tempstr); + f = fopen(EFFECT_NAME, "r"); + + if( f == NULL ) { + ZZLog::Error_Log("Failed to find %s, try compiling a non-devbuild\n", EFFECT_NAME); + return false; + } + } + fclose(f); + + sprintf(EFFECT_DIR, "%s/%s", curwd, tempstr); + sprintf(EFFECT_NAME, "%sps2hw.fx", EFFECT_DIR); + #endif +#endif // RELEASE_TO_PUBLIC return true; } @@ -173,37 +255,61 @@ void ZZshGLEnableProfile() { cgGLEnableProfile(cgfProf); } -// This is a helper of cgGLSetParameter4fv, made for debugging purposes. -// The name could be any string. We must use it on compilation time, because the erronious handler does not -// return it. -void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name) -{ +// This is helper of cgGLSetParameter4fv, made for debug purpose. +// Name could be any string. We must use it on compilation time, because erroneus handler does not +// return name +void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name) { ShaderHandleName = name; cgGLSetParameter4fv(param, v); } - -// The same function for texture, also to cgGLEnable + +void ZZshSetParameter4fv(ZZshProgram prog, ZZshParameter param, const float* v, const char* name) { + ShaderHandleName = name; + cgGLSetParameter4fv(param, v); +} + +// The same stuff, but also with retry of param, name should be USED name of param for prog. +void ZZshSetParameter4fvWithRetry(ZZshParameter* param, ZZshProgram prog, const float* v, const char* name) { + if (param != NULL) + ZZshSetParameter4fv(prog, param[0], v, name); + else + ZZshSetParameter4fv(prog, cgGetNamedParameter(prog, name), v, name); +} + void ZZshGLSetTextureParameter(ZZshParameter param, GLuint texobj, const char* name) { ShaderHandleName = name; cgGLSetTextureParameter(param, texobj); cgGLEnableTextureParameter(param); } +// The same function for texture, also to cgGLEnable +void ZZshGLSetTextureParameter(ZZshProgram prog, ZZshParameter param, GLuint texobj, const char* name) { + ShaderHandleName = name; + cgGLSetTextureParameter(param, texobj); + cgGLEnableTextureParameter(param); +} + // Used sometimes for color 1. void ZZshDefaultOneColor( FRAGMENTSHADER ptr ) { ShaderHandleName = "Set Default One color"; - Vector v = Vector ( 1, 1, 1, 1 ); - ZZshSetParameter4fv( ptr.sOneColor, v, "DefaultOne"); + float4 v = float4 ( 1, 1, 1, 1 ); + ZZshSetParameter4fv( ptr.prog, ptr.sOneColor, v, "DefaultOne"); } -void ZZshSetVertexShader(ZZshShader prog) { +#define SET_UNIFORMPARAM(var, name) { \ + p = cgGetNamedParameter(pf->prog, name); \ + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) \ + pf->var = p; \ +} \ + +void ZZshSetVertexShader(ZZshProgram prog) { if ((prog) != g_vsprog) { cgGLBindProgram(prog); g_vsprog = prog; } } -void ZZshSetPixelShader(ZZshShader prog) { +void ZZshSetPixelShader(ZZshProgram prog) { if ((prog) != g_psprog) { cgGLBindProgram(prog); g_psprog = prog; @@ -213,125 +319,158 @@ void ZZshSetPixelShader(ZZshShader prog) { void SetupFragmentProgramParameters(FRAGMENTSHADER* pf, int context, int type) { // uniform parameters - pf->connect(g_fparamFogColor, "g_fFogColor"); + ZZshParameter p; - pf->set_uniform_param(pf->sOneColor, "g_fOneColor"); - pf->set_uniform_param(pf->sBitBltZ, "g_fBitBltZ"); - pf->set_uniform_param(pf->sInvTexDims, "g_fInvTexDims"); - pf->set_uniform_param(pf->fTexAlpha2, "fTexAlpha2"); - pf->set_uniform_param(pf->fTexOffset, "g_fTexOffset"); - pf->set_uniform_param(pf->fTexDims, "g_fTexDims"); - pf->set_uniform_param(pf->fTexBlock, "g_fTexBlock"); - pf->set_uniform_param(pf->fClampExts, "g_fClampExts"); - pf->set_uniform_param(pf->fTexWrapMode, "TexWrapMode"); - pf->set_uniform_param(pf->fRealTexDims, "g_fRealTexDims"); - pf->set_uniform_param(pf->fTestBlack, "g_fTestBlack"); - pf->set_uniform_param(pf->fPageOffset, "g_fPageOffset"); - pf->set_uniform_param(pf->fTexAlpha, "fTexAlpha"); - - // textures - pf->set_texture(ptexBlocks, "g_sBlocks"); - - // cg parameter usage is wrong, so do it manually - - switch (type) - { - case 3: - pf->set_texture(ptexConv16to32, "g_sConv16to32"); - break; - - case 4: - pf->set_texture(ptexConv32to16, "g_sConv32to16"); - break; - - default: - pf->set_texture(ptexBilinearBlocks, "g_sBilinearBlocks"); - break; + p = cgGetNamedParameter(pf->prog, "g_fFogColor"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + cgConnectParameter(g_fparamFogColor, p); } - pf->set_texture(pf->sMemory, "g_sMemory"); + SET_UNIFORMPARAM(sOneColor, "g_fOneColor"); + SET_UNIFORMPARAM(sBitBltZ, "g_fBitBltZ"); + SET_UNIFORMPARAM(sInvTexDims, "g_fInvTexDims"); + SET_UNIFORMPARAM(fTexAlpha2, "fTexAlpha2"); + SET_UNIFORMPARAM(fTexOffset, "g_fTexOffset"); + SET_UNIFORMPARAM(fTexDims, "g_fTexDims"); + SET_UNIFORMPARAM(fTexBlock, "g_fTexBlock"); + SET_UNIFORMPARAM(fClampExts, "g_fClampExts"); + SET_UNIFORMPARAM(fTexWrapMode, "TexWrapMode"); + SET_UNIFORMPARAM(fRealTexDims, "g_fRealTexDims"); + SET_UNIFORMPARAM(fTestBlack, "g_fTestBlack"); + SET_UNIFORMPARAM(fPageOffset, "g_fPageOffset"); + SET_UNIFORMPARAM(fTexAlpha, "fTexAlpha"); - pf->set_texture(pf->sFinal, "g_sSrcFinal"); - pf->set_texture(pf->sBitwiseANDX, "g_sBitwiseANDX"); - pf->set_texture(pf->sBitwiseANDY, "g_sBitwiseANDY"); - pf->set_texture(pf->sCLUT, "g_sCLUT"); - pf->set_texture(pf->sInterlace, "g_sInterlace"); + // textures + p = cgGetNamedParameter(pf->prog, "g_sBlocks"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + cgGLSetTextureParameter(p, ptexBlocks); + cgGLEnableTextureParameter(p); + } + + // cg parameter usage is wrong, so do it manually + if( type == 3 ) { + p = cgGetNamedParameter(pf->prog, "g_sConv16to32"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + cgGLSetTextureParameter(p, ptexConv16to32); + cgGLEnableTextureParameter(p); + } + } + else if( type == 4 ) { + p = cgGetNamedParameter(pf->prog, "g_sConv32to16"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + cgGLSetTextureParameter(p, ptexConv32to16); + cgGLEnableTextureParameter(p); + } + } + else { + p = cgGetNamedParameter(pf->prog, "g_sBilinearBlocks"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + cgGLSetTextureParameter(p, ptexBilinearBlocks); + cgGLEnableTextureParameter(p); + } + } + + p = cgGetNamedParameter(pf->prog, "g_sMemory"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + //cgGLEnableTextureParameter(p); + pf->sMemory = p; + } + p = cgGetNamedParameter(pf->prog, "g_sSrcFinal"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + //cgGLEnableTextureParameter(p); + pf->sFinal = p; + } + p = cgGetNamedParameter(pf->prog, "g_sBitwiseANDX"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + //cgGLEnableTextureParameter(p); + pf->sBitwiseANDX = p; + } + p = cgGetNamedParameter(pf->prog, "g_sBitwiseANDY"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + //cgGLEnableTextureParameter(p); + pf->sBitwiseANDY = p; + } + p = cgGetNamedParameter(pf->prog, "g_sCLUT"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + //cgGLEnableTextureParameter(p); + pf->sCLUT = p; + } + p = cgGetNamedParameter(pf->prog, "g_sInterlace"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + //cgGLEnableTextureParameter(p); + pf->sInterlace = p; + } // set global shader constants - pf->set_shader_const(Vector(0.5f, (conf.settings().exact_color) ? 0.9f / 256.0f : 0.5f / 256.0f, 0, 1 / 255.0f), "g_fExactColor"); - pf->set_shader_const(Vector(-0.2f, -0.65f, 0.9f, 1.0f / 32767.0f), "g_fBilinear"); - pf->set_shader_const(Vector(1.0f / 256.0f, 1.0004f, 1, 0.5f), "g_fZBias"); - pf->set_shader_const(Vector(0, 1, 0.001f, 0.5f), "g_fc0"); - pf->set_shader_const(Vector(1 / 1024.0f, 0.2f / 1024.0f, 1 / 128.0f, 1 / 512.0f), "g_fMult"); -} + p = cgGetNamedParameter(pf->prog, "g_fExactColor"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) { + cgGLSetParameter4fv(p, float4(0.5f, (conf.settings().exact_color)?0.9f/256.0f:0.5f/256.0f, 0,1/255.0f)); + } -static bool outdated_shaders = false; + p = cgGetNamedParameter(pf->prog, "g_fBilinear"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) + cgGLSetParameter4fv(p, float4(-0.2f, -0.65f, 0.9f, 1.0f / 32767.0f )); + + p = cgGetNamedParameter(pf->prog, "g_fZBias"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) + cgGLSetParameter4fv(p, float4(1.0f/256.0f, 1.0004f, 1, 0.5f)); + + p = cgGetNamedParameter(pf->prog, "g_fc0"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) + cgGLSetParameter4fv(p, float4(0,1, 0.001f, 0.5f)); + + p = cgGetNamedParameter(pf->prog, "g_fMult"); + if( p != NULL && cgIsParameterUsed(p, pf->prog) == CG_TRUE ) + cgGLSetParameter4fv(p, float4(1/1024.0f, 0.2f/1024.0f, 1/128.0f, 1/512.0f)); +} void SetupVertexProgramParameters(ZZshProgram prog, int context) { ZZshParameter p; p = cgGetNamedParameter(prog, "g_fPosXY"); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) + if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) cgConnectParameter(g_vparamPosXY[context], p); // Set Z-test, log or no log; - if (conf.settings().no_logz) - { - g_vdepth = Vector(255.0 / 256.0f, 255.0 / 65536.0f, 255.0f / (65535.0f * 256.0f), 1.0f / (65536.0f * 65536.0f)); - vlogz = Vector(1.0f, 0.0f, 0.0f, 0.0f); + if (conf.settings().no_logz) { + g_vdepth = float4( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f)); + vlogz = float4( 1.0f, 0.0f, 0.0f, 0.0f); } - else - { - g_vdepth = Vector(256.0f * 65536.0f, 65536.0f, 256.0f, 65536.0f * 65536.0f); - vlogz = Vector(0.0f, 1.0f, 0.0f, 0.0f); + else { + g_vdepth = float4( 256.0f*65536.0f, 65536.0f, 256.0f, 65536.0f*65536.0f); + vlogz = float4( 0.0f, 1.0f, 0.0f, 0.0f); } p = cgGetNamedParameter(prog, "g_fZ"); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) - { + if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) { cgGLSetParameter4fv(p, g_vdepth); p = cgGetNamedParameter(prog, "g_fZMin"); // Switch to flat-z when needed - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) - { - //ZZLog::Error_Log("Use flat-z"); - cgGLSetParameter4fv(p, vlogz); + if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) { + //ZZLog::Error_Log("Use flat-z\n"); + cgGLSetParameter4fv(p, vlogz); } else - { - if (!outdated_shaders) - { - outdated_shaders = true; - ZZLog::Error_Log("Shader file version is outdated! Only log-Z is possible."); - } - } + ZZLog::Error_Log("Shader file version is outdated! Only log-Z is possible."); } - Vector vnorm = Vector(g_filog32, 0, 0, 0); - + float4 vnorm = float4(g_filog32, 0, 0,0); p = cgGetNamedParameter(prog, "g_fZNorm"); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) + if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) cgGLSetParameter4fv(p, vnorm); p = cgGetNamedParameter(prog, "g_fBilinear"); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) - cgGLSetParameter4fv(p, Vector(-0.2f, -0.65f, 0.9f, 1.0f / 32767.0f)); + if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) + cgGLSetParameter4fv(p, float4(-0.2f, -0.65f, 0.9f, 1.0f / 32767.0f )); p = cgGetNamedParameter(prog, "g_fZBias"); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) - cgGLSetParameter4fv(p, Vector(1.0f / 256.0f, 1.0004f, 1, 0.5f)); + if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) + cgGLSetParameter4fv(p, float4(1.0f/256.0f, 1.0004f, 1, 0.5f)); p = cgGetNamedParameter(prog, "g_fc0"); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) - cgGLSetParameter4fv(p, Vector(0, 1, 0.001f, 0.5f)); + if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) + cgGLSetParameter4fv(p, float4(0,1, 0.001f, 0.5f)); } #ifndef DEVBUILD @@ -342,21 +481,21 @@ void SetupVertexProgramParameters(ZZshProgram prog, int context) assert( (header) != NULL && (header)->index == (Index) ); \ prog = cgCreateProgram(g_cgcontext, CG_OBJECT, (char*)(s_lpShaderResources + (header)->offset), cgvProf, NULL, NULL); \ if( !cgIsProgram(prog) ) { \ - ZZLog::Error_Log("Failed to load vs %d: \n%s.", Index, cgGetLastListing(g_cgcontext)); \ + ZZLog::Error_Log("Failed to load vs %d: \n%s", Index, cgGetLastListing(g_cgcontext)); \ return false; \ } \ cgGLLoadProgram(prog); \ - if( cgGetError() != CG_NO_ERROR ) ZZLog::Error_Log("failed to load program %d.", Index); \ + if( cgGetError() != CG_NO_ERROR ) ZZLog::Error_Log("Failed to load program %d.", Index); \ SetupVertexProgramParameters(prog, !!(Index&SH_CONTEXT1)); \ } \ - + #define LOAD_PS(Index, fragment) { \ bLoadSuccess = true; \ assert( mapShaderResources.find(Index) != mapShaderResources.end() ); \ header = mapShaderResources[Index]; \ fragment.prog = cgCreateProgram(g_cgcontext, CG_OBJECT, (char*)(s_lpShaderResources + (header)->offset), cgfProf, NULL, NULL); \ if( !cgIsProgram(fragment.prog) ) { \ - ZZLog::Error_Log("Failed to load ps %d: \n%s.", Index, cgGetLastListing(g_cgcontext)); \ + ZZLog::Error_Log("Failed to load ps %d: \n%s", Index, cgGetLastListing(g_cgcontext)); \ return false; \ } \ cgGLLoadProgram(fragment.prog); \ @@ -366,70 +505,63 @@ void SetupVertexProgramParameters(ZZshProgram prog, int context) } \ SetupFragmentProgramParameters(&fragment, !!(Index&SH_CONTEXT1), 0); \ } \ - -bool ZeroGS::LoadEffects() + +inline bool ZeroGS::LoadEffects() { - assert(s_lpShaderResources != NULL); + assert( s_lpShaderResources != NULL ); // process the header u32 num = *(u32*)s_lpShaderResources; - int compressed_size = *(int*)(s_lpShaderResources + 4); - int real_size = *(int*)(s_lpShaderResources + 8); + int compressed_size = *(int*)(s_lpShaderResources+4); + int real_size = *(int*)(s_lpShaderResources+8); int out; char* pbuffer = (char*)malloc(real_size); - inf((char*)s_lpShaderResources + 12, &pbuffer[0], compressed_size, real_size, &out); + inf((char*)s_lpShaderResources+12, &pbuffer[0], compressed_size, real_size, &out); assert(out == real_size); s_lpShaderResources = (u8*)pbuffer; SHADERHEADER* header = (SHADERHEADER*)s_lpShaderResources; mapShaderResources.clear(); - - while (num-- > 0) - { + while(num-- > 0 ) { mapShaderResources[header->index] = header; ++header; } // clear the textures - for (int i = 0; i < ARRAY_SIZE(ppsTexture); ++i) - { + for(u16 i = 0; i < ARRAY_SIZE(ppsTexture); ++i) { SAFE_RELEASE_PROG(ppsTexture[i].prog); ppsTexture[i].prog = NULL; } - #ifndef _DEBUG memset(ppsTexture, 0, sizeof(ppsTexture)); - #endif return true; } // called -bool ZeroGS::LoadExtraEffects() +bool ZZshLoadExtraEffects() { SHADERHEADER* header; bool bLoadSuccess = true; const int vsshaders[4] = { SH_REGULARVS, SH_TEXTUREVS, SH_REGULARFOGVS, SH_TEXTUREFOGVS }; - for (int i = 0; i < 4; ++i) - { + for(int i = 0; i < 4; ++i) { LOAD_VS(vsshaders[i], pvs[2*i]); LOAD_VS((vsshaders[i] | SH_CONTEXT1), pvs[2*i+1]); //if( conf.mrtdepth ) { - LOAD_VS((vsshaders[i] | SH_WRITEDEPTH), pvs[2*i+8]); - LOAD_VS((vsshaders[i] | SH_WRITEDEPTH | SH_CONTEXT1), pvs[2*i+8+1]); + LOAD_VS((vsshaders[i] | SH_WRITEDEPTH), pvs[2*i+8]); + LOAD_VS((vsshaders[i] | SH_WRITEDEPTH | SH_CONTEXT1), pvs[2*i+8+1]); // } // else { // pvs[2*i+8] = pvs[2*i+8+1] = NULL; // } } - + LOAD_VS(SH_BITBLTVS, pvsBitBlt.prog); - pvsBitBlt.sBitBltPos = cgGetNamedParameter(pvsBitBlt.prog, "g_fBitBltPos"); pvsBitBlt.sBitBltTex = cgGetNamedParameter(pvsBitBlt.prog, "g_fBitBltTex"); pvsBitBlt.fBitBltTrans = cgGetNamedParameter(pvsBitBlt.prog, "g_fBitBltTrans"); @@ -437,52 +569,40 @@ bool ZeroGS::LoadExtraEffects() LOAD_PS(SH_REGULARPS, ppsRegular[0]); LOAD_PS(SH_REGULARFOGPS, ppsRegular[1]); - if (conf.mrtdepth) - { + if( conf.mrtdepth ) { LOAD_PS(SH_REGULARPS, ppsRegular[2]); - - if (!bLoadSuccess) + if( !bLoadSuccess ) conf.mrtdepth = 0; - LOAD_PS(SH_REGULARFOGPS, ppsRegular[3]); - - if (!bLoadSuccess) + if( !bLoadSuccess ) conf.mrtdepth = 0; } LOAD_PS(SH_BITBLTPS, ppsBitBlt[0]); - LOAD_PS(SH_BITBLTAAPS, ppsBitBlt[1]); - - if (!bLoadSuccess) - { + if( !bLoadSuccess ) { ZZLog::Error_Log("Failed to load BitBltAAPS, using BitBltPS."); LOAD_PS(SH_BITBLTPS, ppsBitBlt[1]); } - LOAD_PS(SH_BITBLTDEPTHPS, ppsBitBltDepth); - LOAD_PS(SH_CRTCTARGPS, ppsCRTCTarg[0]); LOAD_PS(SH_CRTCTARGINTERPS, ppsCRTCTarg[1]); - + g_bCRTCBilinear = true; LOAD_PS(SH_CRTCPS, ppsCRTC[0]); - - if (!bLoadSuccess) - { + if( !bLoadSuccess ) { // switch to simpler g_bCRTCBilinear = false; LOAD_PS(SH_CRTC_NEARESTPS, ppsCRTC[0]); LOAD_PS(SH_CRTCINTER_NEARESTPS, ppsCRTC[0]); } - else - { + else { LOAD_PS(SH_CRTCINTERPS, ppsCRTC[1]); } - if (!bLoadSuccess) + if( !bLoadSuccess ) ZZLog::Error_Log("Failed to create CRTC shaders."); - + LOAD_PS(SH_CRTC24PS, ppsCRTC24[0]); LOAD_PS(SH_CRTC24INTERPS, ppsCRTC24[1]); LOAD_PS(SH_ZEROPS, ppsOne); @@ -493,105 +613,82 @@ bool ZeroGS::LoadExtraEffects() return true; } -FRAGMENTSHADER* ZeroGS::LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed) +FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed) { int texwrap; - assert(texfilter < NUM_FILTERS); + assert( texfilter < NUM_FILTERS ); - if (g_nPixelShaderVer & SHADER_REDUCED) texfilter = 0; + if(g_nPixelShaderVer&SHADER_REDUCED) + texfilter = 0; + assert(!(g_nPixelShaderVer&SHADER_REDUCED) || !exactcolor); - assert(!(g_nPixelShaderVer & SHADER_REDUCED) || !exactcolor); - - if (clamp.wms == clamp.wmt) - { - switch (clamp.wms) - { - case 0: - texwrap = TEXWRAP_REPEAT; - break; - - case 1: - texwrap = TEXWRAP_CLAMP; - break; - - case 2: - texwrap = TEXWRAP_CLAMP; - break; - - default: - texwrap = TEXWRAP_REGION_REPEAT; - break; + if( clamp.wms == clamp.wmt ) { + switch( clamp.wms ) { + case 0: texwrap = TEXWRAP_REPEAT; break; + case 1: texwrap = TEXWRAP_CLAMP; break; + case 2: texwrap = TEXWRAP_CLAMP; break; + default: texwrap = TEXWRAP_REGION_REPEAT; break; } } - else if (clamp.wms == 3 || clamp.wmt == 3) + else if( clamp.wms==3||clamp.wmt==3) texwrap = TEXWRAP_REGION_REPEAT; else texwrap = TEXWRAP_REPEAT_CLAMP; int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0); + + assert( index < ARRAY_SIZE(ppsTexture) ); + FRAGMENTSHADER* pf = ppsTexture+index; + + if( pbFailed != NULL ) *pbFailed = false; - assert(index < ARRAY_SIZE(ppsTexture)); + if( pf->prog != NULL ) + return pf; - FRAGMENTSHADER* pf = ppsTexture + index; - - if (pbFailed != NULL) *pbFailed = false; - - if (pf->prog != NULL) return pf; - - if ((g_nPixelShaderVer & SHADER_ACCURATE) && mapShaderResources.find(index + NUM_SHADERS*SHADER_ACCURATE) != mapShaderResources.end()) - index += NUM_SHADERS * SHADER_ACCURATE; - - assert(mapShaderResources.find(index) != mapShaderResources.end()); + if( (g_nPixelShaderVer & SHADER_ACCURATE) && mapShaderResources.find(index+NUM_SHADERS*SHADER_ACCURATE) != mapShaderResources.end() ) + index += NUM_SHADERS*SHADER_ACCURATE; + assert( mapShaderResources.find(index) != mapShaderResources.end() ); SHADERHEADER* header = mapShaderResources[index]; + if( header == NULL ) + ZZLog::Error_Log("%d %d", index, g_nPixelShaderVer); + assert( header != NULL ); - if (header == NULL) ZZLog::Error_Log("%d %d", index, g_nPixelShaderVer); - - assert(header != NULL); - - //ZZLog::Debug_Log("Shader:\n%s.", (char*)(s_lpShaderResources + (header)->offset)); + //DEBUG_LOG("shader:\n%s\n", (char*)(s_lpShaderResources + (header)->offset)); pf->prog = cgCreateProgram(g_cgcontext, CG_OBJECT, (char*)(s_lpShaderResources + (header)->offset), cgfProf, NULL, NULL); - - if (pf->prog != NULL && cgIsProgram(pf->prog) && cgGetError() == CG_NO_ERROR) - { + if( pf->prog != NULL && cgIsProgram(pf->prog) && cgGetError() == CG_NO_ERROR ) { SetupFragmentProgramParameters(pf, context, type); cgGLLoadProgram(pf->prog); - - if (cgGetError() != CG_NO_ERROR) - { + if( cgGetError() != CG_NO_ERROR ) { // cgGLLoadProgram(pf->prog); // if( cgGetError() != CG_NO_ERROR ) { - ZZLog::Error_Log("Failed to load shader %d,%d,%d,%d.", type, fog, texfilter, 4*clamp.wms + clamp.wmt); - - if (pbFailed != NULL) *pbFailed = true; - - return pf; - + ZZLog::Error_Log("Failed to load shader %d,%d,%d,%d.", type, fog, texfilter, 4*clamp.wms+clamp.wmt); + if( pbFailed != NULL ) *pbFailed = true; + return pf; // } } return pf; } - ZZLog::Error_Log("Failed to create shader %d,%d,%d,%d", type, fog, texfilter, 4*clamp.wms + clamp.wmt); - - if (pbFailed != NULL) *pbFailed = true; + ZZLog::Error_Log("Failed to create shader %d,%d,%d,%d", type, fog, texfilter, 4*clamp.wms+clamp.wmt); + if( pbFailed != NULL ) *pbFailed = true; return NULL; } - -#else // defined(ZEROGS_DEVBUILD) + +#else // not RELEASE_TO_PUBLIC #define LOAD_VS(name, prog, shaderver) { \ prog = cgCreateProgramFromFile(g_cgcontext, CG_SOURCE, EFFECT_NAME, shaderver, name, args); \ if( !cgIsProgram(prog) ) { \ - ZZLog::Error_Log("Failed to load vs %s: \n%s.", name, cgGetLastListing(g_cgcontext)); \ + ZZLog::Error_Log("Failed to load vs %s: \n%s", name, cgGetLastListing(g_cgcontext)); \ return false; \ } \ cgGLLoadProgram(prog); \ - if( cgGetError() != CG_NO_ERROR ) ZZLog::Error_Log("failed to load program %s.", name); \ + if( cgGetError() != CG_NO_ERROR ) ZZLog::Error_Log("failed to load program %s", name); \ SetupVertexProgramParameters(prog, args[0]==context1); \ } \ - + #ifdef _DEBUG #define SET_PSFILENAME(frag, name) frag.filename = name #else @@ -602,35 +699,33 @@ FRAGMENTSHADER* ZeroGS::LoadShadeEffect(int type, int texfilter, int fog, int te bLoadSuccess = true; \ fragment.prog = cgCreateProgramFromFile(g_cgcontext, CG_SOURCE, EFFECT_NAME, shaderver, name, args); \ if( !cgIsProgram(fragment.prog) ) { \ - ZZLog::Error_Log("Failed to load ps %s: \n%s.", name, cgGetLastListing(g_cgcontext)); \ + ZZLog::Error_Log("Failed to load ps %s: \n%s", name, cgGetLastListing(g_cgcontext)); \ return false; \ } \ cgGLLoadProgram(fragment.prog); \ if( cgGetError() != CG_NO_ERROR ) { \ - ZZLog::Error_Log("failed to load program %s.", name); \ + ZZLog::Error_Log("failed to load program %s", name); \ bLoadSuccess = false; \ } \ SetupFragmentProgramParameters(&fragment, args[0]==context1, 0); \ SET_PSFILENAME(fragment, name); \ } \ - -bool ZeroGS::LoadEffects() + +inline bool ZeroGS::LoadEffects() { // clear the textures - for (int i = 0; i < ARRAY_SIZE(ppsTexture); ++i) - { + for(int i = 0; i < ARRAY_SIZE(ppsTexture); ++i) { SAFE_RELEASE_PROG(ppsTexture[i].prog); } #ifndef _DEBUG memset(ppsTexture, 0, sizeof(ppsTexture)); - #endif return true; } -bool ZeroGS::LoadExtraEffects() +bool ZZshLoadExtraEffects() { const char* args[] = { NULL , NULL, NULL, NULL }; char context0[255], context1[255]; @@ -641,8 +736,7 @@ bool ZeroGS::LoadExtraEffects() const char* pvsshaders[4] = { "RegularVS", "TextureVS", "RegularFogVS", "TextureFogVS" }; - for (int i = 0; i < 4; ++i) - { + for(int i = 0; i < 4; ++i) { args[0] = context0; args[1] = NULL; LOAD_VS(pvsshaders[i], pvs[2*i], cgvProf); @@ -650,11 +744,11 @@ bool ZeroGS::LoadExtraEffects() LOAD_VS(pvsshaders[i], pvs[2*i+1], cgvProf); //if( conf.mrtdepth ) { - args[0] = context0; - args[1] = write_depth; - LOAD_VS(pvsshaders[i], pvs[2*i+8], cgvProf); - args[0] = context1; - LOAD_VS(pvsshaders[i], pvs[2*i+8+1], cgvProf); + args[0] = context0; + args[1] = write_depth; + LOAD_VS(pvsshaders[i], pvs[2*i+8], cgvProf); + args[0] = context1; + LOAD_VS(pvsshaders[i], pvs[2*i+8+1], cgvProf); // } // else { // pvs[2*i+8] = pvs[2*i+8+1] = NULL; @@ -662,7 +756,6 @@ bool ZeroGS::LoadExtraEffects() } args[0] = context0; - args[1] = NULL; LOAD_VS("BitBltVS", pvsBitBlt.prog, cgvProf); pvsBitBlt.sBitBltPos = cgGetNamedParameter(pvsBitBlt.prog, "g_fBitBltPos"); @@ -672,142 +765,116 @@ bool ZeroGS::LoadExtraEffects() LOAD_PS("RegularPS", ppsRegular[0], cgfProf); LOAD_PS("RegularFogPS", ppsRegular[1], cgfProf); - if (conf.mrtdepth) - { + if( conf.mrtdepth ) { args[0] = context0; args[1] = write_depth; LOAD_PS("RegularPS", ppsRegular[2], cgfProf); - - if (!bLoadSuccess) conf.mrtdepth = 0; - + if( !bLoadSuccess ) + conf.mrtdepth = 0; LOAD_PS("RegularFogPS", ppsRegular[3], cgfProf); - - if (!bLoadSuccess) conf.mrtdepth = 0; + if( !bLoadSuccess ) + conf.mrtdepth = 0; } LOAD_PS("BitBltPS", ppsBitBlt[0], cgfProf); LOAD_PS("BitBltAAPS", ppsBitBlt[1], cgfProf); - - if (!bLoadSuccess) - { + if( !bLoadSuccess ) { ZZLog::Error_Log("Failed to load BitBltAAPS, using BitBltPS."); LOAD_PS("BitBltPS", ppsBitBlt[1], cgfProf); } LOAD_PS("BitBltDepthPS", ppsBitBltDepth, cgfProf); - LOAD_PS("CRTCTargPS", ppsCRTCTarg[0], cgfProf); + LOAD_PS("CRTCTargPS", ppsCRTCTarg[0], cgfProf); LOAD_PS("CRTCTargInterPS", ppsCRTCTarg[1], cgfProf); - + g_bCRTCBilinear = true; LOAD_PS("CRTCPS", ppsCRTC[0], cgfProf); - - if (!bLoadSuccess) - { + if( !bLoadSuccess ) { // switch to simpler g_bCRTCBilinear = false; LOAD_PS("CRTCPS_Nearest", ppsCRTC[0], cgfProf); LOAD_PS("CRTCInterPS_Nearest", ppsCRTC[0], cgfProf); } - else - { + else { LOAD_PS("CRTCInterPS", ppsCRTC[1], cgfProf); } - if (!bLoadSuccess) ZZLog::Error_Log("Failed to create CRTC shaders."); - - LOAD_PS("CRTC24PS", ppsCRTC24[0], cgfProf); - LOAD_PS("CRTC24InterPS", ppsCRTC24[1], cgfProf); + if( !bLoadSuccess ) + ZZLog::Error_Log("Failed to create CRTC shaders."); + + LOAD_PS("CRTC24PS", ppsCRTC24[0], cgfProf); LOAD_PS("CRTC24InterPS", ppsCRTC24[1], cgfProf); LOAD_PS("ZeroPS", ppsOne, cgfProf); LOAD_PS("BaseTexturePS", ppsBaseTexture, cgfProf); LOAD_PS("Convert16to32PS", ppsConvert16to32, cgfProf); LOAD_PS("Convert32to16PS", ppsConvert32to16, cgfProf); // if( !conf.mrtdepth ) { -// ZZLog::Error_Log("Disabling MRT depth writing."); -// s_bWriteDepth = false; +// ZZLog::Error_Log("Disabling MRT depth writing,"); +// s_bWriteDepth = FALSE; // } return true; } -FRAGMENTSHADER* ZeroGS::LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed) +FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed) { int texwrap; - - assert(texfilter < NUM_FILTERS); + + assert( texfilter < NUM_FILTERS ); //assert( g_nPixelShaderVer == SHADER_30 ); - - if (clamp.wms == clamp.wmt) - { - switch (clamp.wms) - { - case 0: - texwrap = TEXWRAP_REPEAT; - break; - - case 1: - texwrap = TEXWRAP_CLAMP; - break; - - case 2: - texwrap = TEXWRAP_CLAMP; - break; - + if( clamp.wms == clamp.wmt ) { + switch( clamp.wms ) { + case 0: texwrap = TEXWRAP_REPEAT; break; + case 1: texwrap = TEXWRAP_CLAMP; break; + case 2: texwrap = TEXWRAP_CLAMP; break; default: - texwrap = TEXWRAP_REGION_REPEAT; - break; + texwrap = TEXWRAP_REGION_REPEAT; break; } } - else if (clamp.wms == 3 || clamp.wmt == 3) + else if( clamp.wms==3||clamp.wmt==3) texwrap = TEXWRAP_REGION_REPEAT; else texwrap = TEXWRAP_REPEAT_CLAMP; int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0); - if (pbFailed != NULL) *pbFailed = false; + if( pbFailed != NULL ) *pbFailed = false; - FRAGMENTSHADER* pf = ppsTexture + index; - - if (pf->prog != NULL) return pf; + FRAGMENTSHADER* pf = ppsTexture+index; + if( pf->prog != NULL ) + return pf; + pf->prog = LoadShaderFromType(EFFECT_DIR, EFFECT_NAME, type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, g_nPixelShaderVer, context); - if (pf->prog != NULL) - { + if( pf->prog != NULL ) { #ifdef _DEBUG char str[255]; - sprintf(str, "Texture%s%d_%sPS", fog ? "Fog" : "", texfilter, g_pTexTypes[type]); + sprintf(str, "Texture%s%d_%sPS", fog?"Fog":"", texfilter, g_pTexTypes[type]); pf->filename = str; #endif SetupFragmentProgramParameters(pf, context, type); cgGLLoadProgram(pf->prog); - - if (cgGetError() != CG_NO_ERROR) - { + if( cgGetError() != CG_NO_ERROR ) { // try again // cgGLLoadProgram(pf->prog); // if( cgGetError() != CG_NO_ERROR ) { - ZZLog::Error_Log("Failed to load shader %d,%d,%d,%d.", type, fog, texfilter, 4*clamp.wms + clamp.wmt); - - if (pbFailed != NULL) *pbFailed = true; - - //assert(0); - // NULL makes things crash - return pf; - + ZZLog::Error_Log("Failed to load shader %d,%d,%d,%d", type, fog, texfilter, 4*clamp.wms+clamp.wmt); + if( pbFailed != NULL ) *pbFailed = true; + //assert(0); + // NULL makes things crash + return pf; // } } - return pf; } - ZZLog::Error_Log("Failed to create shader %d,%d,%d,%d.", type, fog, texfilter, 4*clamp.wms + clamp.wmt); - - if (pbFailed != NULL) *pbFailed = true; + ZZLog::Error_Log("Failed to create shader %d,%d,%d,%d", type, fog, texfilter, 4*clamp.wms+clamp.wmt); + if( pbFailed != NULL ) *pbFailed = true; return NULL; } -#endif // !defined(ZEROGS_DEVBUILD) +#endif // RELEASE_TO_PUBLIC +//#endif // NVIDIA_CG_API diff --git a/plugins/zzogl-pg/opengl/ZZoglShaders.h b/plugins/zzogl-pg/opengl/ZZoglShaders.h index 41b7511e51..a941702c5a 100644 --- a/plugins/zzogl-pg/opengl/ZZoglShaders.h +++ b/plugins/zzogl-pg/opengl/ZZoglShaders.h @@ -55,16 +55,16 @@ inline bool ZZshActiveParameter(ZZshParameter param) {return (param !=NULL); } #endif // end NVIDIA cg-toolkit API const static char* g_pPsTexWrap[] = { "-DREPEAT", "-DCLAMP", "-DREGION_REPEAT", NULL }; -const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex16to8h" }; enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC}; -// We have "compatible" shaders, as RegularFogVS and RegularFogPS, if we don't need to worry about incompatible shaders. -// It's used only in GLSL mode. +// We have "compatible" shaders, as RegularFogVS and RegularFogPS. if don't need to wory about incompatible shaders +// It used only in GLSL mode. // ------------------------- Variables ------------------------------- -extern int g_nPixelShaderVer; -extern ZZshShaderLink pvs[16], g_vsprog, g_psprog; -extern ZZshParameter g_vparamPosXY[2], g_fparamFogColor; + +extern int g_nPixelShaderVer; +extern ZZshShaderLink pvs[16], g_vsprog, g_psprog; +extern ZZshParameter g_vparamPosXY[2], g_fparamFogColor; #define MAX_ACTIVE_UNIFORMS 600 #define MAX_ACTIVE_SHADERS 400 @@ -73,18 +73,18 @@ struct FRAGMENTSHADER { FRAGMENTSHADER() : prog(sZero), Shader(0), sMemory(pZero), sFinal(pZero), sBitwiseANDX(pZero), sBitwiseANDY(pZero), sInterlace(pZero), sCLUT(pZero), sOneColor(pZero), sBitBltZ(pZero), fTexAlpha2(pZero), fTexOffset(pZero), fTexDims(pZero), fTexBlock(pZero), fClampExts(pZero), fTexWrapMode(pZero), - fRealTexDims(pZero), fTestBlack(pZero), fPageOffset(pZero), fTexAlpha(pZero) {} - - ZZshShaderLink prog; // it links to the FRAGMENTSHADER structure, for compatibility between GLSL and CG. - ZZshShader Shader; // GLSL store shaders not as ready programs, but as shader compiled objects. VS and PS should be linked together to - // make a program. + fRealTexDims(pZero), fTestBlack(pZero), fPageOffset(pZero), fTexAlpha(pZero) {} + + ZZshShaderLink prog; // it link to FRAGMENTSHADER structure, for compability between GLSL and CG + ZZshShader Shader; // GLSL store shader's not as ready programs, but as shaders compilated object. VS and PS should be linked together to + // made a program. ZZshShaderType ShaderType; // Not every PS and VS are used together, only compatible ones. ZZshParameter sMemory, sFinal, sBitwiseANDX, sBitwiseANDY, sInterlace, sCLUT; ZZshParameter sOneColor, sBitBltZ, sInvTexDims; ZZshParameter fTexAlpha2, fTexOffset, fTexDims, fTexBlock, fClampExts, fTexWrapMode, fRealTexDims, fTestBlack, fPageOffset, fTexAlpha; - int ParametersStart, ParametersFinish; // this is part of UniformsIndex array in which parameters of this shader asre stored. The last one is ParametersFinish-1 + int ParametersStart, ParametersFinish; // this is part of UniformsIndex array in which parameters of this shader stored. Last one is ParametersFinish-1 #ifdef _DEBUG string filename; @@ -145,7 +145,7 @@ struct FRAGMENTSHADER return false; } - bool set_shader_const(Vector v, const char *name) + bool set_shader_const(float4 v, const char *name) { ZZshParameter p; @@ -174,29 +174,17 @@ struct VERTEXSHADER int ParametersStart, ParametersFinish; }; -namespace ZeroGS { - // Shaders variables - extern Vector g_vdepth; - extern Vector vlogz; +namespace ZeroGS { + extern float4 g_vdepth; + extern float4 vlogz; extern VERTEXSHADER pvsBitBlt; extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; // ppsOne used to stop using shaders for draw extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; - bool LoadEffects(); - bool LoadExtraEffects(); - FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); - // only sets a limited amount of state (for Update) - void SetTexClamping(int context, FRAGMENTSHADER* pfragment); - void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force); + extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS]; + extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2]; } -// ------------------------- Variables ------------------------------- - -extern u8* s_lpShaderResources; -extern ZZshProfile cgvProf, cgfProf; -extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS]; -extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2]; - // ------------------------- Functions ------------------------------- #ifdef NVIDIA_CG_API @@ -208,7 +196,7 @@ inline bool ZZshExistProgram(ZZshShaderLink prog) {return (prog != NULL); }; extern const char* ShaderCallerName; extern const char* ShaderHandleName; -inline void SetShaderCaller(const char* Name) { +inline void SetShaderCaller(const char* Name) { ShaderCallerName = Name; } @@ -222,22 +210,23 @@ inline void ResetShaderCounters() { extern bool ZZshCheckProfilesSupport(); extern bool ZZshStartUsingShaders(); +extern bool ZZshCreateOpenShadersFile(); extern void ZZshGLDisableProfile(); extern void ZZshGLEnableProfile(); +extern void ZZshSetParameter4fv(ZZshShaderLink prog, ZZshParameter param, const float* v, const char* name); extern void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name); +extern void ZZshSetParameter4fvWithRetry(ZZshParameter* param, ZZshShaderLink prog, const float* v, const char* name); +extern void ZZshGLSetTextureParameter(ZZshShaderLink prog, ZZshParameter param, GLuint texobj, const char* name); extern void ZZshGLSetTextureParameter(ZZshParameter param, GLuint texobj, const char* name); extern void ZZshDefaultOneColor( FRAGMENTSHADER ptr ); -extern void ZZshSetVertexShader(ZZshShader prog); -extern void ZZshSetPixelShader(ZZshShader prog); +extern void ZZshSetVertexShader(ZZshShaderLink prog); +extern void ZZshSetPixelShader(ZZshShaderLink prog); +extern bool ZZshLoadExtraEffects(); -inline int GET_SHADER_INDEX(int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int context, int ps) -{ - return type + texfilter*NUM_TYPES + NUM_FILTERS*NUM_TYPES*texwrap + NUM_TEXWRAPS*NUM_FILTERS*NUM_TYPES*(fog+2*writedepth+4*testaem+8*exactcolor+16*context+32*ps); +extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); + +namespace ZeroGS { + // only sets a limited amount of state (for Update) + void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force); } - -struct SHADERHEADER -{ - unsigned int index, offset, size; // if highest bit of index is set, pixel shader -}; - #endif diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index 2624ac145f..65b7d2f243 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -122,22 +122,22 @@ inline void FillOnlyStencilBuffer() // used for transformation from vertex position in GS window.coords (I hope) // to view coordinates (in range 0, 1). -inline Vector ZeroGS::CRenderTarget::DefaultBitBltPos() +inline float4 ZeroGS::CRenderTarget::DefaultBitBltPos() { - Vector v = Vector(1, -1, 0.5f / (float)RW(fbw), 0.5f / (float)RH(fbh)); + float4 v = float4(1, -1, 0.5f / (float)RW(fbw), 0.5f / (float)RH(fbh)); v *= 1.0f / 32767.0f; - ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_sBitBltPos"); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltPos, v, "g_sBitBltPos"); return v; } // Used to transform texture coordinates from GS (when 0,0 is upper left) to // OpenGL (0,0 - lower left). -inline Vector ZeroGS::CRenderTarget::DefaultBitBltTex() +inline float4 ZeroGS::CRenderTarget::DefaultBitBltTex() { // I really sure that -0.5 is correct, because OpenGL have no half-offset // issue, DirectX known for. - Vector v = Vector(1, -1, 0.5f / (float)RW(fbw), -0.5f / (float)RH(fbh)); - ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_sBitBltTex"); + float4 v = float4(1, -1, 0.5f / (float)RW(fbw), -0.5f / (float)RH(fbh)); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_sBitBltTex"); return v; } @@ -222,7 +222,7 @@ void ZeroGS::CRenderTarget::SetTarget(int fbplocal, const Rect2& scissor, int co if (fbplocal != fbp) { - Vector v; + float4 v; // will be rendering to a subregion u32 bpp = PSMT_ISHALF(psm) ? 2 : 4; @@ -401,7 +401,7 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth) ((CDepthTarget*)pdepth)->SetDepthStencilSurface(); SetShaderCaller("CRenderTarget::Update"); - Vector v = DefaultBitBltPos(); + float4 v = DefaultBitBltPos(); CRenderTargetMngr::MAPTARGETS::iterator ittarg; @@ -432,7 +432,7 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth) if (nUpdateTarg) { - ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ittarg->second->ptex, "BaseTexture.final"); + ZZshGLSetTextureParameter(ppsBaseTexture.prog, ppsBaseTexture.sFinal, ittarg->second->ptex, "BaseTexture.final"); //assert( ittarg->second->fbw == fbw ); int offset = (fbp - ittarg->second->fbp) * 64 / fbw; @@ -445,7 +445,7 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth) v.z = 0.25f; v.w = (float)RH(offset) + 0.25f; - ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); // v = DefaultBitBltTex(); Maybe? ZZshDefaultOneColor ( ppsBaseTexture ); @@ -472,14 +472,14 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth) // Fix in r133 -- FFX movies and Gust backgrounds! //SetTexVariablesInt(0, 0*(AA.x || AA.y) ? 2 : 0, texframe, false, &ppsBitBlt[!!s_AAx], 1); SetTexVariablesInt(0, 0, texframe, false, &ppsBitBlt[bit_idx], 1); - ZZshGLSetTextureParameter(ppsBitBlt[bit_idx].sMemory, vb[0].pmemtarg->ptex->tex, "BitBlt.memory"); + ZZshGLSetTextureParameter(ppsBitBlt[bit_idx].prog, ppsBitBlt[bit_idx].sMemory, vb[0].pmemtarg->ptex->tex, "BitBlt.memory"); - v = Vector(1, 1, 0.0f, 0.0f); - ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); + v = float4(1, 1, 0.0f, 0.0f); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); v.x = 1; v.y = 2; - ZZshSetParameter4fv(ppsBitBlt[bit_idx].sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(ppsBitBlt[bit_idx].prog, ppsBitBlt[bit_idx].sOneColor, v, "g_fOneColor"); assert(ptex != 0); @@ -536,26 +536,26 @@ void ZeroGS::CRenderTarget::ConvertTo32() SetShaderCaller("CRenderTarget::ConvertTo32"); // tex coords, test ffx bikanel island when changing these - Vector v = DefaultBitBltPos(); + float4 v = DefaultBitBltPos(); v = DefaultBitBltTex(); v.x = (float)RW(16); v.y = (float)RH(16); v.z = -(float)RW(fbw); v.w = (float)RH(8); - ZZshSetParameter4fv(ppsConvert16to32.fTexOffset, v, "g_fTexOffset"); + ZZshSetParameter4fv(ppsConvert16to32.prog, ppsConvert16to32.fTexOffset, v, "g_fTexOffset"); v.x = (float)RW(8); v.y = 0; v.z = 0; v.w = 0.25f; - ZZshSetParameter4fv(ppsConvert16to32.fPageOffset, v, "g_fPageOffset"); + ZZshSetParameter4fv(ppsConvert16to32.prog, ppsConvert16to32.fPageOffset, v, "g_fPageOffset"); v.x = (float)RW(2 * fbw); v.y = (float)RH(fbh); v.z = 0; v.w = 0.0001f * (float)RH(fbh); - ZZshSetParameter4fv(ppsConvert16to32.fTexDims, v, "g_fTexDims"); + ZZshSetParameter4fv(ppsConvert16to32.prog, ppsConvert16to32.fTexDims, v, "g_fTexDims"); // v.x = 0; // ZZshSetParameter4fv(ppsConvert16to32.fTexBlock, v, "g_fTexBlock"); @@ -568,7 +568,7 @@ void ZeroGS::CRenderTarget::ConvertTo32() ZeroGS::ResetRenderTarget(1); BindToSample(&ptex); - ZZshGLSetTextureParameter(ppsConvert16to32.sFinal, ptex, "Convert 16 to 32.Final"); + ZZshGLSetTextureParameter(ppsConvert16to32.prog, ppsConvert16to32.sFinal, ptex, "Convert 16 to 32.Final"); fbh /= 2; // have 16 bit surfaces are usually 2x higher SetViewport(); @@ -640,26 +640,26 @@ void ZeroGS::CRenderTarget::ConvertTo16() SetShaderCaller("CRenderTarget::ConvertTo16"); // tex coords, test ffx bikanel island when changing these - Vector v = DefaultBitBltPos(); + float4 v = DefaultBitBltPos(); v = DefaultBitBltTex(); v.x = 16.0f / (float)fbw; v.y = 8.0f / (float)fbh; v.z = 0.5f * v.x; v.w = 0.5f * v.y; - ZZshSetParameter4fv(ppsConvert32to16.fTexOffset, v, "g_fTexOffset"); + ZZshSetParameter4fv(ppsConvert32to16.prog, ppsConvert32to16.fTexOffset, v, "g_fTexOffset"); v.x = 256.0f / 255.0f; v.y = 256.0f / 255.0f; v.z = 0.05f / 256.0f; v.w = -0.001f / 256.0f; - ZZshSetParameter4fv(ppsConvert32to16.fPageOffset, v, "g_fPageOffset"); + ZZshSetParameter4fv(ppsConvert32to16.prog, ppsConvert32to16.fPageOffset, v, "g_fPageOffset"); v.x = (float)RW(fbw); v.y = (float)RH(2 * fbh); v.z = 0; v.w = -0.1f / RH(fbh); - ZZshSetParameter4fv(ppsConvert32to16.fTexDims, v, "g_fTexDims"); + ZZshSetParameter4fv(ppsConvert32to16.prog, ppsConvert32to16.fTexDims, v, "g_fTexDims"); glBindBuffer(GL_ARRAY_BUFFER, vboRect); SET_STREAM(); @@ -671,7 +671,7 @@ void ZeroGS::CRenderTarget::ConvertTo16() BindToSample(&ptex); - ZZshGLSetTextureParameter(ppsConvert32to16.sFinal, ptex, "Convert 32 to 16"); + ZZshGLSetTextureParameter(ppsConvert32to16.prog, ppsConvert32to16.sFinal, ptex, "Convert 32 to 16"); // fbh *= 2; // have 16 bit surfaces are usually 2x higher @@ -748,22 +748,22 @@ void ZeroGS::CRenderTarget::_CreateFeedback() ResetRenderTarget(1); // tex coords, test ffx bikanel island when changing these - /* Vector v = DefaultBitBltPos(); - v = Vector ((float)(RW(fbw+4)), (float)(RH(fbh+4)), +0.25f, -0.25f); - ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "BitBltTex");*/ + /* float4 v = DefaultBitBltPos(); + v = float4 ((float)(RW(fbw+4)), (float)(RH(fbh+4)), +0.25f, -0.25f); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "BitBltTex");*/ // tex coords, test ffx bikanel island when changing these -// Vector v = Vector(1, -1, 0.5f / (fbw << AA.x), 0.5f / (fbh << AA.y)); +// float4 v = float4(1, -1, 0.5f / (fbw << AA.x), 0.5f / (fbh << AA.y)); // v *= 1/32767.0f; // cgGLSetParameter4fv(pvsBitBlt.sBitBltPos, v); - Vector v = DefaultBitBltPos(); + float4 v = DefaultBitBltPos(); v.x = (float)(RW(fbw)); v.y = (float)(RH(fbh)); v.z = 0.0f; v.w = 0.0f; - ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "BitBlt.Feedback"); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "BitBlt.Feedback"); ZZshDefaultOneColor(ppsBaseTexture); glBindBuffer(GL_ARRAY_BUFFER, vboRect); @@ -773,7 +773,7 @@ void ZeroGS::CRenderTarget::_CreateFeedback() glBindTexture(GL_TEXTURE_RECTANGLE_NV, ptex); GL_REPORT_ERRORD(); - ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ptex, "BaseTexture.Feedback"); + ZZshGLSetTextureParameter(ppsBaseTexture.prog, ppsBaseTexture.sFinal, ptex, "BaseTexture.Feedback"); SetViewport(); @@ -976,9 +976,9 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr) // write color and zero out stencil buf, always 0 context! SetTexVariablesInt(0, 0, texframe, false, &ppsBitBltDepth, 1); - ZZshGLSetTextureParameter(ppsBitBltDepth.sMemory, vb[0].pmemtarg->ptex->tex, "BitBltDepth"); + ZZshGLSetTextureParameter(ppsBitBltDepth.prog, ppsBitBltDepth.sMemory, vb[0].pmemtarg->ptex->tex, "BitBltDepth"); - Vector v = DefaultBitBltPos(); + float4 v = DefaultBitBltPos(); v = DefaultBitBltTex(); @@ -986,9 +986,9 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr) v.y = 2; v.z = PSMT_IS16Z(psm) ? 1.0f : 0.0f; v.w = g_filog32; - ZZshSetParameter4fv(ppsBitBltDepth.sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(ppsBitBltDepth.prog, ppsBitBltDepth.sOneColor, v, "g_fOneColor"); - Vector vdepth = g_vdepth; + float4 vdepth = g_vdepth; if (psm == PSMT24Z) { @@ -1001,7 +1001,7 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr) assert(ppsBitBltDepth.sBitBltZ != 0); - ZZshSetParameter4fv(ppsBitBltDepth.sBitBltZ, ((255.0f / 256.0f)*vdepth), "g_fBitBltZ"); + ZZshSetParameter4fv(ppsBitBltDepth.prog, ppsBitBltDepth.sBitBltZ, ((255.0f / 256.0f)*vdepth), "g_fBitBltZ"); assert(pdepth != 0); //GLint w1 = 0; diff --git a/plugins/zzogl-pg/opengl/targets.h b/plugins/zzogl-pg/opengl/targets.h index e9ecc8dafb..ee65bfbce2 100644 --- a/plugins/zzogl-pg/opengl/targets.h +++ b/plugins/zzogl-pg/opengl/targets.h @@ -228,7 +228,6 @@ inline list CreateTargetsList(int start, int end) return listTargs; } -extern Vector g_vdepth; extern int icurctx; extern GLuint vboRect; diff --git a/plugins/zzogl-pg/opengl/zerogs.cpp b/plugins/zzogl-pg/opengl/zerogs.cpp index bae3235064..88bda55557 100644 --- a/plugins/zzogl-pg/opengl/zerogs.cpp +++ b/plugins/zzogl-pg/opengl/zerogs.cpp @@ -29,7 +29,6 @@ #include "Mem.h" #include "x86.h" #include "zerogs.h" -#include "zpipe.h" #include "targets.h" #include "GLWin.h" #include "ZZoglShaders.h" @@ -51,7 +50,6 @@ extern int g_nFrame, g_nRealFrame; //-------------------------- Variables primInfo *prim; -ZZshProgram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ inline u32 FtoDW(float f) { return (*((u32*)&f)); } @@ -82,7 +80,6 @@ PFNGLDRAWBUFFERSPROC glDrawBuffers = NULL; ///////////////////// // graphics resources -ZZshParameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0; bool s_bTexFlush = false; int s_nLastResolveReset = 0; @@ -94,10 +91,8 @@ int nBackbufferWidth, nBackbufferHeight; // ZZ namespace ZeroGS { -Vector g_vdepth, vlogz; - -// = Vector( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f)); -// Vector g_vdepth = Vector( 65536.0f*65536.0f, 256.0f*65536.0f, 65536.0f, 256.0f); +// = float4( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f)); +// float4 g_vdepth = float4( 65536.0f*65536.0f, 256.0f*65536.0f, 65536.0f, 256.0f); extern CRangeManager s_RangeMngr; // manages overwritten memory @@ -341,7 +336,7 @@ void ZeroGS::DrawText(const char* pstr, int left, int top, u32 color) FUNCLOG ZZshGLDisableProfile(); - Vector v; + float4 v; v.SetColor(color); glColor3f(v.z, v.y, v.x); //glColor3f(((color >> 16) & 0xff) / 255.0f, ((color >> 8) & 0xff)/ 255.0f, (color & 0xff) / 255.0f); @@ -490,19 +485,19 @@ void ZeroGS::RenderCustom(float fAlpha) glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); // tex coords - Vector v = Vector(1 / 32767.0f, 1 / 32767.0f, 0, 0); - ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos"); + float4 v = float4(1 / 32767.0f, 1 / 32767.0f, 0, 0); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltPos, v, "g_fBitBltPos"); v.x = (float)nLogoWidth; v.y = (float)nLogoHeight; - ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); + ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); v.x = v.y = v.z = v.w = fAlpha; - ZZshSetParameter4fv(ppsBaseTexture.sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(ppsBaseTexture.prog, ppsBaseTexture.sOneColor, v, "g_fOneColor"); if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); // inside vhDCb[0]'s target area, so render that region only - ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ptexLogo, "Logo"); + ZZshGLSetTextureParameter(ppsBaseTexture.prog, ppsBaseTexture.sFinal, ptexLogo, "Logo"); glBindBuffer(GL_ARRAY_BUFFER, vboRect); SET_STREAM(); @@ -781,7 +776,7 @@ void ZeroGS::SetFogColor(u32 fog) ZeroGS::FlushBoth(); SetShaderCaller("SetFogColor"); - Vector v; + float4 v; // set it immediately v.SetColor(gs.fogcol); @@ -795,7 +790,7 @@ void ZeroGS::SetFogColor(GIFRegFOGCOL* fog) FUNCLOG SetShaderCaller("SetFogColor"); - Vector v; + float4 v; v.x = fog->FCR / 255.0f; v.y = fog->FCG / 255.0f; diff --git a/plugins/zzogl-pg/opengl/zerogs.h b/plugins/zzogl-pg/opengl/zerogs.h index 6883704dcb..c9365593be 100644 --- a/plugins/zzogl-pg/opengl/zerogs.h +++ b/plugins/zzogl-pg/opengl/zerogs.h @@ -66,7 +66,10 @@ extern float g_fiGPU_TEXWIDTH; #define MASKDIVISOR 0 // Used for decrement bitwise mask texture size if 1024 is too big #define GPU_TEXMASKWIDTH (1024 >> MASKDIVISOR) // bitwise mask width for region repeat mode +extern u32 ptexBlocks; // holds information on block tiling. It's texture number in OpenGL -- if 0 than such texture +extern u32 ptexConv16to32; // does not exists. This textures should be created on start and released on finish. extern u32 ptexBilinearBlocks; +extern u32 ptexConv32to16; // this is currently *not* used as a bool, in spite of its moniker --air // Actually, the only thing written to it is 1 or 0, which makes the (g_bSaveFlushedFrame & 0x80000000) check rather bizzare. @@ -136,7 +139,7 @@ class CRenderTarget int fbp, fbw, fbh, fbhCalc; // if fbp is negative, virtual target (not mapped to any real addr) int start, end; // in bytes u32 lastused; // time stamp since last used - Vector vposxy; + float4 vposxy; u32 fbm; u16 status; @@ -161,8 +164,8 @@ class CRenderTarget TS_NeedConvert32 = 16, TS_NeedConvert16 = 32, }; - inline Vector DefaultBitBltPos() ; - inline Vector DefaultBitBltTex() ; + inline float4 DefaultBitBltPos(); + inline float4 DefaultBitBltTex(); private: void _CreateFeedback();