/* ZZ Open GL graphics plugin * Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com * Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ #ifndef ZEROGS_X86 #define ZEROGS_X86 #include "GS.h" #ifndef ZZNORMAL_MEMORY // StarOcean use 24 in logo and 4HH and 4HL in menu subfont // Tony hawk use 16, but have a lot of trouble // This function move one blockwidth * blockheigh data block from src to dst, in assumption, that in dst we store swizzled data, template inline void __fastcall SwizzleBlock(u32* dst, u32* src, int pitch, u32 WriteMask = 0xffffffff) { u8 B = (PSM_PIXELS_PER_WORD() > 2)? 4 : 2; assert ((pitch & 3) == 0 ); u32* src1 = src; u32* src2 = src + pitch / 4; for(int j = 0; j < 4 ; j++, src1 += B * pitch / 4, src2 += B * pitch / 4) for(int i = 0; i < 8; i++) { fillPixelsFromMemory(dst, src1, i, B * j, pitch /4, 0, 0, WriteMask); fillPixelsFromMemory(dst, src2, i, B * j + 1, pitch / 4 , 0, 0, WriteMask); } } // Simply AA multiplication. We does not use src[j << AA], but prefer to keep more central pixel in data. // We does not use mixing of neighbour pixels, because it does not give any noticiable bonus, but speed penalty is big. template inline u32 mixed_pixel(u32* src, int j) { if (AA == 0) return src[j] ; if (AA == 1) return src[(j << 1) + 1]; if (AA == 2) return src[(j << 2) + 2]; } // We fill destination word for pixel number j (j < 8). For 16-bit storage upper size of this word is pixel of j + 8, // and RGBA data should be convert to ARGB16. // WARNING: floating storage is never be testing template inline u32 convert_pixel(u32* src, int j) { if (is_float) { Vector_16F* fsrc = (Vector_16F*)src; // We use simplified code for float, it seems not // to be used anyway. if (PSM_ISHALF()) { return Float16ToARGB16 ( fsrc[j << AA]) + (Float16ToARGB16(fsrc[(j + 8) << AA]) << 16); } else { return Float16ToARGB ( fsrc[j << AA] ); } } else { if (PSM_ISHALF()) { return RGBA32to16(mixed_pixel(src, j)) + (RGBA32to16(mixed_pixel(src, j + 8)) << 16); } else { return mixed_pixel(src, j); } } } // put data in u32 destination word for pixel x, y < 8 in swizzled block. Note, that in 16-bit target we put 2 pixels (x,y // and x+8, y) in the same word. template inline void SettleSwizzlePixel(u32* dst, u32* src, int srcpitch, u32 mask) { u32 tmp = convert_pixel(src + y * srcpitch, x); MaskedOR (dst + pix, tmp, mask); // Don't forget to use mask. } // Put in dst memory location swizzled block for src. We does not calculate pixel address there at all. template void __fastcall FrameSwizzleBlock(u32* dst, int sj, int si, u32* src, int srcpitch, u32 WriteMask) { u32 mask = HandleWritemask(WriteMask); // This function made correct mask for 32, 24 and 16 target's for (int i = 0; i < 4; i++) { SettleSwizzlePixel<0, 0, 0, psm, is_float, AA>(dst, src, srcpitch, mask); // it's possible to put one for here, but I don't know, what's faster SettleSwizzlePixel<1, 1, 0, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<2, 0, 1, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<3, 1, 1, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<4, 2, 0, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<5, 3, 0, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<6, 2, 1, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<7, 3, 1, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<8, 4, 0, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<9, 5, 0, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<10, 4, 1, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<11, 5, 1, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<12, 6, 0, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<13, 7, 0, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<14, 6, 1, psm, is_float, AA>(dst, src, srcpitch, mask); SettleSwizzlePixel<15, 7, 1, psm, is_float, AA>(dst, src, srcpitch, mask); src += 2 * srcpitch; dst += 16; } } #endif #endif