pcsx2/plugins/zzogl-pg/opengl/x86.h

121 lines
5.0 KiB
C++

/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef ZEROGS_X86
#define ZEROGS_X86
#include "GS.h"
#ifndef ZZNORMAL_MEMORY
// StarOcean use 24 in logo and 4HH and 4HL in menu subfont
// Tony hawk use 16, but have a lot of trouble
// This function move one blockwidth * blockheigh data block from src to dst, in assumption, that in dst we store swizzled data,
template <int psm>
inline void __fastcall SwizzleBlock(u32* dst, u32* src, int pitch, u32 WriteMask = 0xffffffff) {
u8 B = (PSM_PIXELS_PER_WORD<psm>() > 2)? 4 : 2;
assert ((pitch & 3) == 0 );
u32* src1 = src;
u32* src2 = src + pitch / 4;
for(int j = 0; j < 4 ; j++, src1 += B * pitch / 4, src2 += B * pitch / 4)
for(int i = 0; i < 8; i++) {
fillPixelsFromMemory<psm>(dst, src1, i, B * j, pitch /4, 0, 0, WriteMask);
fillPixelsFromMemory<psm>(dst, src2, i, B * j + 1, pitch / 4 , 0, 0, WriteMask);
}
}
// Simply AA multiplication. We does not use src[j << AA], but prefer to keep more central pixel in data.
// We does not use mixing of neighbour pixels, because it does not give any noticiable bonus, but speed penalty is big.
template <u8 AA>
inline u32 mixed_pixel(u32* src, int j) {
if (AA == 0)
return src[j] ;
if (AA == 1)
return src[(j << 1) + 1];
if (AA == 2)
return src[(j << 2) + 2];
}
// We fill destination word for pixel number j (j < 8). For 16-bit storage upper size of this word is pixel of j + 8,
// and RGBA data should be convert to ARGB16.
// WARNING: floating storage is never be testing
template <int psm, bool is_float, u8 AA>
inline u32 convert_pixel(u32* src, int j) {
if (is_float) {
Vector_16F* fsrc = (Vector_16F*)src; // We use simplified code for float, it seems not
// to be used anyway.
if (PSM_ISHALF<psm>()) {
return Float16ToARGB16 ( fsrc[j << AA]) + (Float16ToARGB16(fsrc[(j + 8) << AA]) << 16);
}
else {
return Float16ToARGB ( fsrc[j << AA] );
}
}
else {
if (PSM_ISHALF<psm>()) {
return RGBA32to16(mixed_pixel<AA>(src, j)) + (RGBA32to16(mixed_pixel<AA>(src, j + 8)) << 16);
}
else {
return mixed_pixel<AA>(src, j);
}
}
}
// put data in u32 destination word for pixel x, y < 8 in swizzled block. Note, that in 16-bit target we put 2 pixels (x,y
// and x+8, y) in the same word.
template <int pix, int x, int y, int psm, bool is_float, u8 AA>
inline void SettleSwizzlePixel(u32* dst, u32* src, int srcpitch, u32 mask) {
u32 tmp = convert_pixel<psm, is_float, AA>(src + y * srcpitch, x);
MaskedOR (dst + pix, tmp, mask); // Don't forget to use mask.
}
// Put in dst memory location swizzled block for src. We does not calculate pixel address there at all.
template <int psm, bool is_float, u8 AA>
void __fastcall FrameSwizzleBlock(u32* dst, int sj, int si, u32* src, int srcpitch, u32 WriteMask) {
u32 mask = HandleWritemask<psm>(WriteMask); // This function made correct mask for 32, 24 and 16 target's
for (int i = 0; i < 4; i++) {
SettleSwizzlePixel<0, 0, 0, psm, is_float, AA>(dst, src, srcpitch, mask); // it's possible to put one for here, but I don't know, what's faster
SettleSwizzlePixel<1, 1, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<2, 0, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<3, 1, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<4, 2, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<5, 3, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<6, 2, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<7, 3, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<8, 4, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<9, 5, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<10, 4, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<11, 5, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<12, 6, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<13, 7, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<14, 6, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<15, 7, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
src += 2 * srcpitch;
dst += 16;
}
}
#endif
#endif