Merge the changes in the GregsMisc branch back into trunk.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4049 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2010-11-24 02:49:23 +00:00
commit bd7fee3be3
69 changed files with 6606 additions and 5892 deletions

View File

@ -39,7 +39,7 @@ set(SoundTouchSources
RateTransposer.cpp
SoundTouch.cpp
TDStretch.cpp
WavFile.cpp
# WavFile.cpp # directly include in spu2x
cpu_detect_x86_gcc.cpp
mmx_optimized.cpp
sse_optimized.cpp)
@ -55,7 +55,7 @@ set(SoundTouchHeaders
STTypes.h
SoundTouch.h
TDStretch.h
WavFile.h
# WavFile.h # directly include in spu2x
cpu_detect.h)
# add library

View File

@ -23,9 +23,6 @@ if(Linux)
endif(GTK2_FOUND)
find_package(X11)
# Manually find Xxf86vm because it is not done in the module...
FIND_LIBRARY(X11_Xxf86vm_LIB Xxf86vm ${X11_LIB_SEARCH_PATH})
MARK_AS_ADVANCED(X11_Xxf86vm_LIB)
endif(Linux)
## Use cmake package to find module

View File

@ -183,11 +183,11 @@ __fi void memcpy_vibes(void * dest, const void * src, int size) {
__asm__ __volatile__
(
".intel_syntax noprefix\n"
"mov eax, %[qwc]\n" // keep a copy of count for looping
"shr eax, 1\n"
"jz memcpy_qwc_1_%=\n" // only one 16 byte block to copy?
"sub %[qwc], 1\n" // dec the counter to ease the count of 16bytes block later (optimization)
// Note after this line, real value of the counter is %[qwc] + 1
"jle memcpy_qwc_1_%=\n" // only one 16 byte block to copy? Or nothing.
"cmp eax, 64\n" // "IN_CACHE_COPY/32"
"cmp %[qwc], 127\n" // "IN_CACHE_COPY/16"
"jb memcpy_qwc_loop1_%=\n" // small copies should be cached (definite speedup --air)
"memcpy_qwc_loop2_%=:\n" // 32-byte blocks, uncached copy
@ -204,8 +204,8 @@ __fi void memcpy_vibes(void * dest, const void * src, int size) {
"add %[src],32\n" // update source pointer
"add %[dest],32\n" // update destination pointer
"sub eax,1\n"
"jnz memcpy_qwc_loop2_%=\n" // last 64-byte block?
"sub %[qwc],2\n"
"jg memcpy_qwc_loop2_%=\n" // last 64-byte block?
"sfence\n" // flush the write buffer
"jmp memcpy_qwc_1_%=\n"
@ -227,12 +227,12 @@ __fi void memcpy_vibes(void * dest, const void * src, int size) {
"add %[src],32\n" // update source pointer
"add %[dest],32\n" // update destination pointer
"sub eax,1\n"
"jnz memcpy_qwc_loop1_%=\n" // last 64-byte block?
"sub %[qwc],2\n"
"jg memcpy_qwc_loop2_%=\n" // last 64-byte block?
"memcpy_qwc_1_%=:\n"
"test %[qwc],1\n"
"jz memcpy_qwc_final_%=\n"
"cmp %[qwc],0\n"
"jne memcpy_qwc_final_%=\n"
"movq mm0,[%[src]]\n"
"movq mm1,[%[src]+8]\n"
"movq [%[dest]], mm0\n"
@ -243,7 +243,7 @@ __fi void memcpy_vibes(void * dest, const void * src, int size) {
".att_syntax\n"
: "=&r"(dest), "=&r"(src), "=&r"(qwc)
: [dest]"0"(dest), [src]"1"(src), [qwc]"2"(qwc)
: "memory", "eax", "mm0", "mm1", "mm2", "mm3"
: "memory", "mm0", "mm1", "mm2", "mm3"
);
}
#endif

View File

@ -16,6 +16,7 @@
#include "PrecompiledHeader.h"
#include "cpudetect_internal.h"
#include <wx/thread.h>
// Note: Apparently this solution is Linux/Solaris only.
// FreeBSD/OsX need something far more complicated (apparently)

View File

@ -16,7 +16,7 @@
</Compiler>
</Target>
<Target title="Release">
<Option output="../../../bin/patches/libFWnull.so.0.5.0" prefix_auto="0" extension_auto="0" />
<Option output="../../../bin/plugins/libFWnull.so.0.5.0" prefix_auto="0" extension_auto="0" />
<Option object_output="obj/Release/" />
<Option type="3" />
<Option compiler="gcc" />

View File

@ -57,8 +57,9 @@ set(spu2xSources
Spu2replay.cpp
spu2sys.cpp
Timestretcher.cpp
#utf8.cpp
Wavedump_wav.cpp)
Wavedump_wav.cpp
WavFile.cpp
)
# spu2x headers
set(spu2xHeaders
@ -75,7 +76,7 @@ set(spu2xHeaders
SndOut.h
spdif.h
Spu2replay.h
#utf8.h
WavFile.h
)

View File

@ -0,0 +1,149 @@
/* SPU2-X, A plugin for Emulating the Sound Processing Unit of the Playstation 2
* Developed and maintained by the Pcsx2 Development Team.
*
* The file is based on WavFile.h from SoundTouch library.
* Original portions are (c) 2009 by Olli Parviainen (oparviai 'at' iki.fi)
*
* SPU2-X is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* SPU2-X is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with SPU2-X. If not, see <http://www.gnu.org/licenses/>.
*/
// Note the file is mostly a copy paste of the WavFile.h from SoundTouch library. It was
// shrunken to support only output 16 bits wav files
#include <stdio.h>
#include <stdexcept>
#include <string>
#include <cstring>
#include <assert.h>
#include <limits.h>
#include "WavFile.h"
using namespace std;
static const char riffStr[] = "RIFF";
static const char waveStr[] = "WAVE";
static const char fmtStr[] = "fmt ";
static const char dataStr[] = "data";
//////////////////////////////////////////////////////////////////////////////
//
// Class WavOutFile
//
WavOutFile::WavOutFile(const char *fileName, int sampleRate, int bits, int channels)
{
bytesWritten = 0;
fptr = fopen(fileName, "wb");
if (fptr == NULL)
{
string msg = "Error : Unable to open file \"";
msg += fileName;
msg += "\" for writing.";
//pmsg = msg.c_str;
throw runtime_error(msg);
}
fillInHeader(sampleRate, bits, channels);
writeHeader();
}
WavOutFile::~WavOutFile()
{
finishHeader();
if (fptr) fclose(fptr);
fptr = NULL;
}
void WavOutFile::fillInHeader(uint sampleRate, uint bits, uint channels)
{
// fill in the 'riff' part..
// copy string 'RIFF' to riff_char
memcpy(&(header.riff.riff_char), riffStr, 4);
// package_len unknown so far
header.riff.package_len = 0;
// copy string 'WAVE' to wave
memcpy(&(header.riff.wave), waveStr, 4);
// fill in the 'format' part..
// copy string 'fmt ' to fmt
memcpy(&(header.format.fmt), fmtStr, 4);
header.format.format_len = 0x10;
header.format.fixed = 1;
header.format.channel_number = (short)channels;
header.format.sample_rate = (int)sampleRate;
header.format.bits_per_sample = (short)bits;
header.format.byte_per_sample = (short)(bits * channels / 8);
header.format.byte_rate = header.format.byte_per_sample * (int)sampleRate;
header.format.sample_rate = (int)sampleRate;
// fill in the 'data' part..
// copy string 'data' to data_field
memcpy(&(header.data.data_field), dataStr, 4);
// data_len unknown so far
header.data.data_len = 0;
}
void WavOutFile::finishHeader()
{
// supplement the file length into the header structure
header.riff.package_len = bytesWritten + 36;
header.data.data_len = bytesWritten;
writeHeader();
}
void WavOutFile::writeHeader()
{
int res;
// write the supplemented header in the beginning of the file
fseek(fptr, 0, SEEK_SET);
res = fwrite(&header, sizeof(header), 1, fptr);
if (res != 1)
{
throw runtime_error("Error while writing to a wav file.");
}
// jump back to the end of the file
fseek(fptr, 0, SEEK_END);
}
void WavOutFile::write(const short *buffer, int numElems)
{
int res;
// 16bit format & 16 bit samples
assert(header.format.bits_per_sample == 16);
if (numElems < 1) return; // nothing to do
res = fwrite(buffer, 2, numElems, fptr);
if (res != numElems)
{
throw runtime_error("Error while writing to a wav file.");
}
bytesWritten += 2 * numElems;
}

View File

@ -0,0 +1,113 @@
/* SPU2-X, A plugin for Emulating the Sound Processing Unit of the Playstation 2
* Developed and maintained by the Pcsx2 Development Team.
*
* The file is based on WavFile.h from SoundTouch library.
* Original portions are (c) 2009 by Olli Parviainen (oparviai 'at' iki.fi)
*
* SPU2-X is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* SPU2-X is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with SPU2-X. If not, see <http://www.gnu.org/licenses/>.
*/
// Note the file is mostly a copy paste of the WavFile.h from SoundTouch library. It was
// shrunken to support only output 16 bits wav files
#ifndef WAVFILE_H
#define WAVFILE_H
#include <stdio.h>
#ifndef uint
typedef unsigned int uint;
#endif
/// WAV audio file 'riff' section header
typedef struct
{
char riff_char[4];
int package_len;
char wave[4];
} WavRiff;
/// WAV audio file 'format' section header
typedef struct
{
char fmt[4];
int format_len;
short fixed;
short channel_number;
int sample_rate;
int byte_rate;
short byte_per_sample;
short bits_per_sample;
} WavFormat;
/// WAV audio file 'data' section header
typedef struct
{
char data_field[4];
uint data_len;
} WavData;
/// WAV audio file header
typedef struct
{
WavRiff riff;
WavFormat format;
WavData data;
} WavHeader;
/// Class for writing WAV audio files.
class WavOutFile
{
private:
/// Pointer to the WAV file
FILE *fptr;
/// WAV file header data.
WavHeader header;
/// Counter of how many bytes have been written to the file so far.
int bytesWritten;
/// Fills in WAV file header information.
void fillInHeader(const uint sampleRate, const uint bits, const uint channels);
/// Finishes the WAV file header by supplementing information of amount of
/// data written to file etc
void finishHeader();
/// Writes the WAV file header.
void writeHeader();
public:
/// Constructor: Creates a new WAV file. Throws a 'runtime_error' exception
/// if file creation fails.
WavOutFile(const char *fileName, ///< Filename
int sampleRate, ///< Sample rate (e.g. 44100 etc)
int bits, ///< Bits per sample (8 or 16 bits)
int channels ///< Number of channels (1=mono, 2=stereo)
);
/// Destructor: Finalizes & closes the WAV file.
~WavOutFile();
/// Write data to WAV file. Throws a 'runtime_error' exception if writing to
/// file fails.
void write(const short *buffer, ///< Pointer to sample data buffer.
int numElems ///< How many array items are to be written to file.
);
};
#endif

View File

@ -16,7 +16,11 @@
*/
#include "Global.h"
#ifdef __LINUX__
#include "WavFile.h"
#else
#include "soundtouch/WavFile.h"
#endif
static WavOutFile* _new_WavOutFile( const char* destfile )
{

View File

@ -855,4 +855,5 @@ public:
#endif
#endif

View File

@ -63,6 +63,11 @@ set(zzoglSources
x86.cpp
zerogs.cpp
zpipe.cpp
ZZClut.cpp
ZZHacks.cpp
ZZKeyboard.cpp
ZZoglDrawing.cpp
ZZLog.cpp
ZZoglCreate.cpp
ZZoglCRTC.cpp
ZZoglFlush.cpp
@ -71,8 +76,7 @@ set(zzoglSources
ZZoglShaders.cpp
ZZoglShoots.cpp
ZZoglVB.cpp
ZZKeyboard.cpp
ZZLog.cpp)
)
# zzogl headers
set(zzoglHeaders
@ -81,6 +85,7 @@ set(zzoglHeaders
GifTransfer.h
# glprocs.h
GS.h
HostMemory.h
Mem.h
Mem_Swizzle.h
Mem_Transmit.h
@ -93,11 +98,17 @@ set(zzoglHeaders
x86.h
zerogs.h
zpipe.h
ZZClut.h
ZZGl.h
ZZHacks.h
ZZoglDrawing.h
ZZLog.h
ZZoglCRTC.h
ZZoglMath.h
ZZoglShaders.h
ZZGl.h
ZZLog.h)
ZZoglShoots.h
ZZoglVB.h
)
# zzogl S sources
set(zzoglSSources

View File

@ -20,44 +20,7 @@
#ifndef CRC_H_INCLUDED
#define CRC_H_INCLUDED
// don't change these values!
enum GAME_HACK_OPTIONS
{
GAME_TEXTURETARGS = 0x00000001,
GAME_AUTORESET = 0x00000002,
GAME_INTERLACE2X = 0x00000004,
GAME_TEXAHACK = 0x00000008, // apply texa to non textured polys
GAME_NOTARGETRESOLVE = 0x00000010,
GAME_EXACTCOLOR = 0x00000020,
GAME_NOCOLORCLAMP = 0x00000040,
GAME_FFXHACK = 0x00000080,
GAME_NOALPHAFAIL = 0x00000100,
GAME_NODEPTHUPDATE = 0x00000200,
GAME_QUICKRESOLVE1 = 0x00000400,
GAME_NOQUICKRESOLVE = 0x00000800,
GAME_NOTARGETCLUT = 0x00001000, // full 16 bit resolution
GAME_NOSTENCIL = 0x00002000,
GAME_VSSHACKOFF = 0x00004000, // vertical stripe syndrome
GAME_NODEPTHRESOLVE = 0x00008000,
GAME_FULL16BITRES = 0x00010000,
GAME_RESOLVEPROMOTED = 0x00020000,
GAME_FASTUPDATE = 0x00040000,
GAME_NOALPHATEST = 0x00080000,
GAME_DISABLEMRTDEPTH = 0x00100000,
GAME_32BITTARGS = 0x00200000,
GAME_PATH3HACK = 0x00400000,
GAME_DOPARALLELCTX = 0x00800000, // tries to parallelize both contexts so that render calls are reduced (xenosaga)
// makes the game faster, but can be buggy
GAME_XENOSPECHACK = 0x01000000, // xenosaga specularity hack (ignore any zmask=1 draws)
GAME_PARTIALPOINTERS = 0x02000000, // whenver the texture or render target are small, tries to look for bigger ones to read from
GAME_PARTIALDEPTH = 0x04000000, // tries to save depth targets as much as possible across height changes
GAME_REGETHACK = 0x08000000, // some sort of weirdness in ReGet() code
GAME_GUSTHACK = 0x10000000, // Needed for Gustgames fast update.
GAME_NOLOGZ = 0x20000000, // Intended for linux -- not logarithmic Z.
GAME_AUTOSKIPDRAW = 0x40000000 // Remove blur effect on some games
};
#define USEALPHATESTING (!(conf.settings().no_alpha_test))
#include "ZZHacks.h"
// CRC Information
enum Title_Info
@ -374,8 +337,10 @@ static const Game_Info crc_game_list[] =
//{0x4437F4B1, ArTonelico1, US, GAME_GUSTHACK, -1, -1},
{0xF95F37EE, ArTonelico2, US, GAME_GUSTHACK, -1, -1},
{0xF46142D3, ArTonelico2, JPUNDUB, GAME_GUSTHACK, -1, -1},
{0x77b0236f, ManaKhemia1, US, GAME_GUSTHACK , -1, -1},
{0x433951e7, ManaKhemia2, US, GAME_GUSTHACK, -1, -1},
// According to Zeydlitz, Mana Khemia no longer needs the Gust Hack.
//{0x77b0236f, ManaKhemia1, US, GAME_GUSTHACK, -1, -1},
//{0x433951e7, ManaKhemia2, US, GAME_GUSTHACK, -1, -1},
//{0xda11c6d4, AtelierJudie, JP, GAME_GUSTHACK, -1, -1},
//{0x3e72c085, AtelierLilie, JP, GAME_GUSTHACK, -1, -1},
//{0x6eac076b, AtelierViorate, JP, GAME_GUSTHACK, -1, -1},

View File

@ -29,6 +29,9 @@
#undef CreateWindow // Undo Windows.h global namespace pollution
extern void SetDeviceSize(int nNewWidth, int nNewHeight);
extern void OnFKey(int key, int shift);
class GLWindow
{
private:
@ -51,6 +54,9 @@ class GLWindow
u32 width, height, depth;
public:
char title[256];
Size backbuffer;
void SwapGLBuffers();
bool ReleaseContext();
@ -63,8 +69,21 @@ class GLWindow
bool DisplayWindow(int _width, int _height);
void SetTitle(char *strtitle);
void ResizeCheck();
};
void ProcessEvents();
void UpdateWindowSize(int nNewWidth, int nNewHeight)
{
FUNCLOG
backbuffer.w = std::max(nNewWidth, 16);
backbuffer.h = std::max(nNewHeight, 16);
if (!(conf.fullscreen()))
{
conf.width = nNewWidth;
conf.height = nNewHeight;
}
}
};
extern GLWindow GLWin;

View File

@ -18,11 +18,11 @@
*/
#include "GS.h"
#include "zerogs.h"
#include "GLWin.h"
#ifdef GL_WIN32_WINDOW
HWND GShwnd = NULL;
HDC hDC = NULL; // Private GDI Device Context
HGLRC hRC = NULL; // Permanent Rendering Context
@ -47,14 +47,14 @@ LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
case WM_SIZE:
nWindowWidth = lParam & 0xffff;
nWindowHeight = lParam >> 16;
ZeroGS::ChangeWindowSize(nWindowWidth, nWindowHeight);
GLWin.UpdateWindowSize(nWindowWidth, nWindowHeight);
break;
case WM_SIZING:
// if button is 0, then just released so can resize
if (GetSystemMetrics(SM_SWAPBUTTON) ? !GetAsyncKeyState(VK_RBUTTON) : !GetAsyncKeyState(VK_LBUTTON))
{
ZeroGS::SetChangeDeviceSize(nWindowWidth, nWindowHeight);
SetDeviceSize(nWindowWidth, nWindowHeight);
}
break;
@ -122,7 +122,11 @@ bool GLWindow::CreateWindow(void *pDisplay)
hInstance, // Instance
NULL); // Don't Pass Anything To WM_CREATE
if (GShwnd == NULL) return false;
if (GShwnd == NULL)
{
ZZLog::Error_Log("Failed to create window. Exiting...");
return false;
}
if (pDisplay != NULL) *(HWND*)pDisplay = GShwnd;
@ -135,6 +139,7 @@ bool GLWindow::CreateWindow(void *pDisplay)
SetFocus(GShwnd);
if (pDisplay == NULL) ZZLog::Error_Log("Failed to create window. Exiting...");
return (pDisplay != NULL);
}
@ -184,8 +189,8 @@ bool GLWindow::DisplayWindow(int _width, int _height)
if (conf.fullscreen())
{
nBackbufferWidth = rcdesktop.right - rcdesktop.left;
nBackbufferHeight = rcdesktop.bottom - rcdesktop.top;
backbuffer.w = rcdesktop.right - rcdesktop.left;
backbuffer.h = rcdesktop.bottom - rcdesktop.top;
dwExStyle = WS_EX_APPWINDOW;
dwStyle = WS_POPUP;
@ -195,6 +200,8 @@ bool GLWindow::DisplayWindow(int _width, int _height)
{
dwExStyle = WS_EX_APPWINDOW | WS_EX_WINDOWEDGE;
dwStyle = WS_OVERLAPPEDWINDOW;
backbuffer.w = _width;
backbuffer.h = _height;
}
dwStyle |= WS_CLIPSIBLINGS | WS_CLIPCHILDREN;
@ -202,8 +209,8 @@ bool GLWindow::DisplayWindow(int _width, int _height)
rc.left = 0;
rc.top = 0;
rc.right = nBackbufferWidth;
rc.bottom = nBackbufferHeight;
rc.right = backbuffer.h;
rc.bottom = backbuffer.h;
AdjustWindowRectEx(&rc, dwStyle, false, dwExStyle);
int X = (rcdesktop.right - rcdesktop.left) / 2 - (rc.right - rc.left) / 2;
int Y = (rcdesktop.bottom - rcdesktop.top) / 2 - (rc.bottom - rc.top) / 2;
@ -218,8 +225,8 @@ bool GLWindow::DisplayWindow(int _width, int _height)
DEVMODE dmScreenSettings;
memset(&dmScreenSettings, 0, sizeof(dmScreenSettings));
dmScreenSettings.dmSize = sizeof(dmScreenSettings);
dmScreenSettings.dmPelsWidth = nBackbufferWidth;
dmScreenSettings.dmPelsHeight = nBackbufferHeight;
dmScreenSettings.dmPelsWidth = backbuffer.w;
dmScreenSettings.dmPelsHeight = backbuffer.h;
dmScreenSettings.dmBitsPerPel = 32;
dmScreenSettings.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT;
@ -300,8 +307,10 @@ bool GLWindow::DisplayWindow(int _width, int _height)
void GLWindow::SwapGLBuffers()
{
static u32 lastswaptime = 0;
if (glGetError() != GL_NO_ERROR) ZZLog::Debug_Log("glError before swap!");
SwapBuffers(hDC);
//glClear(GL_COLOR_BUFFER_BIT);
lastswaptime = timeGetTime();
}
@ -315,4 +324,74 @@ void GLWindow::ResizeCheck()
}
extern void ChangeDeviceSize(int nNewWidth, int nNewHeight);
void GLWindow::ProcessEvents()
{
MSG msg;
ZeroMemory(&msg, sizeof(msg));
while (1)
{
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE))
{
switch (msg.message)
{
case WM_KEYDOWN :
int my_KeyEvent = msg.wParam;
bool my_bShift = !!(GetKeyState(VK_SHIFT) & 0x8000);
switch (msg.wParam)
{
case VK_F5:
case VK_F6:
case VK_F7:
case VK_F9:
OnFKey(msg.wParam - VK_F1 + 1, my_bShift);
break;
case VK_ESCAPE:
if (conf.fullscreen())
{
// destroy that msg
conf.setFullscreen(false);
ChangeDeviceSize(conf.width, conf.height);
UpdateWindow(GShwnd);
continue; // so that msg doesn't get sent
}
else
{
SendMessage(GShwnd, WM_DESTROY, 0, 0);
return;
}
break;
}
break;
}
TranslateMessage(&msg);
DispatchMessage(&msg);
}
else
{
break;
}
}
if ((GetKeyState(VK_MENU) & 0x8000) && (GetKeyState(VK_RETURN) & 0x8000))
{
conf.zz_options.fullscreen = !conf.zz_options.fullscreen;
SetDeviceSize(
(conf.fullscreen()) ? 1280 : conf.width,
(conf.fullscreen()) ? 960 : conf.height);
}
}
#endif

View File

@ -19,7 +19,6 @@
#include "Util.h"
#include "GLWin.h"
#include "zerogs.h"
#ifdef GL_X11_WINDOW
@ -35,7 +34,11 @@ bool GLWindow::CreateWindow(void *pDisplay)
glDisplay = XOpenDisplay(0);
glScreen = DefaultScreen(glDisplay);
if (pDisplay == NULL) return false;
if (pDisplay == NULL)
{
ZZLog::Error_Log("Failed to create window. Exiting...");
return false;
}
*(Display**)pDisplay = glDisplay;
@ -135,9 +138,9 @@ void GLWindow::GetWindowSize()
XUnlockDisplay(glDisplay);
// update the gl buffer size
ZeroGS::ChangeWindowSize(width, height);
UpdateWindowSize(width, height);
ZZLog::Error_Log("Resolution %dx%d. Depth %d bpp. Position (%d,%d)", width, height, depth, conf.x, conf.y);
ZZLog::Dev_Log("Resolution %dx%d. Depth %d bpp. Position (%d,%d)", width, height, depth, conf.x, conf.y);
}
void GLWindow::GetGLXVersion()
@ -252,6 +255,9 @@ void GLWindow::ToggleFullscreen()
bool GLWindow::DisplayWindow(int _width, int _height)
{
backbuffer.w = _width;
backbuffer.h = _height;
if (!CreateVisual()) return false;
/* create a GLX context */
@ -301,8 +307,8 @@ bool GLWindow::DisplayWindow(int _width, int _height)
void GLWindow::SwapGLBuffers()
{
if (glGetError() != GL_NO_ERROR) ZZLog::Debug_Log("glError before swap!");
glXSwapBuffers(glDisplay, glWindow);
//glClear(GL_COLOR_BUFFER_BIT);
}
void GLWindow::SetTitle(char *strtitle)
@ -336,7 +342,7 @@ void GLWindow::ResizeCheck()
width = event.xconfigure.width;
height = event.xconfigure.height;
Force43Ratio();
ZeroGS::ChangeWindowSize(width, height);
UpdateWindowSize(width, height);
}
if (!fullScreen) {
@ -352,4 +358,32 @@ void GLWindow::ResizeCheck()
XUnlockDisplay(glDisplay);
}
u32 THR_KeyEvent = 0; // Value for key event processing between threads
bool THR_bShift = false;
void GLWindow::ProcessEvents()
{
FUNCLOG
// check resizing
ResizeCheck();
if (THR_KeyEvent) // This value was passed from GSKeyEvents which could be in another thread
{
int my_KeyEvent = THR_KeyEvent;
bool my_bShift = THR_bShift;
THR_KeyEvent = 0;
switch (my_KeyEvent)
{
case XK_F5:
case XK_F6:
case XK_F7:
case XK_F9:
OnFKey(my_KeyEvent - XK_F1 + 1, my_bShift);
break;
}
}
}
#endif

View File

@ -25,39 +25,13 @@
#include "Util.h"
#include "GifTransfer.h"
#include "HostMemory.h"
using namespace std;
extern float fFPS;
#define MEMORY_END 0x00400000
extern int g_LastCRC;
extern u8* g_pBasePS2Mem;
extern u8* g_pbyGSMemory;
class GSMemory
{
public:
void init();
void destroy();
u8* get();
u8* get(u32 addr);
u8* get_raw(u32 addr);
};
extern u8* g_pbyGSClut; // the temporary clut buffer
class GSClut
{
public:
void init();
void destroy();
u8* get();
u8* get(u32 addr);
u8* get_raw(u32 addr);
};
struct Vector_16F
{
@ -66,22 +40,7 @@ struct Vector_16F
// PS2 vertex
struct VertexGPU
{
// gained from XYZ2, XYZ3, XYZF2, XYZF3,
// X -- bits 0-15, Y-16-31. Z - 32-63 if no F used, 32-55 otherwise, F (fog) - 56-63
// X, Y stored in 12d3 format,
s16 x, y, f, resv0; // note: xy is 12d3
// Vertex color settings. RGB -- luminance of red/green/blue, A -- alpha. 1.0 == 0x80.
// Goes grom RGBAQ register, bits 0-7, 8-15, 16-23 and 24-31 accordingly
u32 rgba;
u32 z;
// Texture coordinates. S & T going from ST register (bits 0-31, and 32-63).
// Q goes from RGBAQ register, bits 32-63
float s, t, q;
};
// Almost same as previous, controlled by prim.fst flags
// Almost same as VertexGPU, controlled by prim.fst flags
struct Vertex
{
@ -94,6 +53,75 @@ struct Vertex
u16 u, v;
};
struct VertexGPU
{
// gained from XYZ2, XYZ3, XYZF2, XYZF3,
// X -- bits 0-15, Y-16-31. Z - 32-63 if no F used, 32-55 otherwise, F (fog) - 56-63
// X, Y stored in 12d3 format,
s16 x, y;
s16 f, resv0;
// Vertex color settings. RGB -- luminance of red/green/blue, A -- alpha. 1.0 == 0x80.
// Goes grom RGBAQ register, bits 0-7, 8-15, 16-23 and 24-31 accordingly
u32 rgba;
u32 z;
// Texture coordinates. S & T going from ST register (bits 0-31, and 32-63).
// Q goes from RGBAQ register, bits 32-63
float s, t, q;
void move_x(Vertex v, int offset)
{
x = ((((int)v.x - offset) >> 1) & 0xffff);
}
void move_y(Vertex v, int offset)
{
y = ((((int)v.y - offset) >> 1) & 0xffff);
}
void move_z(Vertex v, int mask)
{
z = (mask == 0xffff) ? min((u32)0xffff, v.z) : v.z;
}
void move_fog(Vertex v)
{
f = ((s16)(v).f << 7) | 0x7f;
}
void set_xy(s16 x1, s16 y1)
{
x = x1;
y = y1;
}
void set_xyz(s16 x1, s16 y1, u32 z1)
{
x = x1;
y = y1;
z = z1;
}
void set_st(float s1, float t1)
{
s = s1;
t = t1;
}
void set_stq(float s1, float t1, float q1)
{
s = s1;
t = t1;
q = q1;
}
void set_xyzst(s16 x1, s16 y1, u32 z1, float s1, float t1)
{
set_xyz(x1, y1, z1);
set_st(s1, t1);
}
};
extern GSconf conf;
// PSM values
@ -346,7 +374,7 @@ union tex_0_info
u32 psm_fix()
{
// printf ("psm %d\n", psm);
// ZZLog::Debug_Log("psm %d\n", psm);
if (psm == 9) return 1;
return psm;
@ -385,6 +413,10 @@ union tex_0_info
#define TEX_HIGHLIGHT 2
#define TEX_HIGHLIGHT2 3
bool SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int height);
extern void SaveTex(tex0Info* ptex, int usevid);
extern char* NamedSaveTex(tex0Info* ptex, int usevid);
typedef struct
{
int lcm;
@ -471,14 +503,16 @@ typedef struct
typedef struct
{
Vertex gsvertex[3];
u32 rgba;
Vertex gsvertex[4]; // circular buffer that contains the vertex
Vertex gsTriFanVertex; // Base of triangle fan primitive vertex
u32 rgba; // global color for flat shading texture
float q;
Vertex vertexregs;
Vertex vertexregs; // accumulation buffer that collect current vertex data
int primC; // number of verts current storing
int primIndex; // current prim index
int nTriFanVert;
int nTriFanVert; // remember the index of the base of triangle fan
int new_tri_fan; // 1 if we process a new triangle fan primitive. 0 otherwise
int prac;
int dthe;
@ -512,9 +546,17 @@ typedef struct
GSClut clut_buffer;
int primNext(int inc = 1)
{
return ((primIndex + inc) % ARRAY_SIZE(gsvertex));
// Note: ArraySize(gsvertex) == 2^n => modulo is replaced by an and instruction
return ((primIndex + inc) % ArraySize(gsvertex));
}
int primPrev(int dec = 1)
{
// Note: assert( dec <= ArraySize(gsvertex) );
// Note: ArraySize(gsvertex) == 2^n => modulo is replaced by an and instruction
return ((primIndex + (ArraySize(gsvertex) - dec)) % ArraySize(gsvertex));
}
void setRGBA(u32 r, u32 g, u32 b, u32 a)
{
rgba = (r & 0xff) |
@ -523,29 +565,39 @@ typedef struct
((a & 0xff) << 24);
}
void add_vertex(u16 x, u16 y, u32 z, u16 f)
inline void add_vertex(u16 x, u16 y, u32 z, u16 f)
{
vertexregs.x = x;
vertexregs.y = y;
vertexregs.z = z;
vertexregs.f = f;
gsvertex[primIndex] = vertexregs;
primIndex = primNext();
if (likely(!new_tri_fan)) {
gsvertex[primIndex] = vertexregs;
} else {
gsTriFanVertex = vertexregs;
new_tri_fan = false;
}
}
void add_vertex(u16 x, u16 y, u32 z)
inline void add_vertex(u16 x, u16 y, u32 z)
{
vertexregs.x = x;
vertexregs.y = y;
vertexregs.z = z;
gsvertex[primIndex] = vertexregs;
primIndex = primNext();
if (likely(!new_tri_fan)) {
gsvertex[primIndex] = vertexregs;
} else {
gsTriFanVertex = vertexregs;
new_tri_fan = false;
}
}
} GSinternal;
extern GSinternal gs;
static __forceinline u16 RGBA32to16(u32 c)
// Note the function is used in a template parameter so it must be declared extern
// Note2: In this case extern is not compatible with __forceinline so just inline it...
extern inline u16 RGBA32to16(u32 c)
{
return (u16)((((c) & 0x000000f8) >> 3) |
(((c) & 0x0000f800) >> 6) |
@ -673,7 +725,7 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
{
//return tex_0_info(data).psm_fix();
int result = ZZOglGet_psm_TexBits(data) ;
// printf ("result %d\n", result);
// ZZLog::Debug_Log("result %d", result);
if (result == 9) result = 1;
@ -910,6 +962,21 @@ inline bool ZZOglClutStorageUnchanged(const u32* oldtex, const u32* newtex)
return ((oldtex[1] & 0x1ff10000) == (newtex[1] & 0x1ff10000));
}
// call to load CLUT data (depending on CLD)
void texClutWrite(int ctx);
// Perform clutting for flushed texture. Better check if it needs a prior call.
inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx)
{
tex0->cbp = ZZOglGet_cbp_TexBits(Data);
tex0->cpsm = ZZOglGet_cpsm_TexBits(Data);
tex0->csm = ZZOglGet_csm_TexBits(Data);
tex0->csa = ZZOglGet_csa_TexBits(Data);
tex0->cld = ZZOglGet_cld_TexBits(Data);
texClutWrite(ictx);
};
// CSA and CPSM bitmask 0001 1111 0111 1000 ...
// 60 56 52
#define CPSM_CSA_BITMASK 0x1f780000

View File

@ -17,28 +17,16 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#if defined(_WIN32)
#include <windows.h>
#include "Win32.h"
#include <io.h>
#endif
#include "Util.h"
#include "GS.h"
#include "Profile.h"
#include "GLWin.h"
#include "ZZoglFlushHack.h"
#include <stdlib.h>
#include <string>
using namespace std;
#include "GS.h"
#include "Mem.h"
#include "Regs.h"
#include "Profile.h"
#include "GLWin.h"
#include "zerogs.h"
#include "targets.h"
#include "ZZoglShaders.h"
#include "ZZoglFlushHack.h"
#include "ZZoglFlushHack.h"
extern void SaveSnapshot(const char* filename);
#ifdef _MSC_VER
#pragma warning(disable:4244)
@ -46,7 +34,6 @@ using namespace std;
GLWindow GLWin;
GSinternal gs;
char GStitle[256];
GSconf conf;
int ppf, g_GSMultiThreaded, CurrentSavestate = 0;
@ -59,7 +46,7 @@ float fFPS = 0;
void (*GSirq)();
u8* g_pBasePS2Mem = NULL;
std::string s_strIniPath("inis/"); // Air's new ini path (r2361)
string s_strIniPath("inis/"); // Air's new ini path (r2361)
bool SaveStateExists = true; // We could not know save slot status before first change occured
const char* SaveStateFile = NULL; // Name of SaveFile for access check.
@ -85,21 +72,24 @@ char *libraryName = "ZZ Ogl PG ";
extern int g_nPixelShaderVer, g_nFrameRender, g_nFramesSkipped;
extern void ProcessEvents();
extern void WriteAA();
extern void WriteBilinear();
extern void ZZDestroy();
extern bool ZZCreate(int width, int height);
extern void ZZGSStateReset();
extern int ZZSave(s8* pbydata);
extern bool ZZLoad(s8* pbydata);
// switches the render target to the real target, flushes the current render targets and renders the real image
extern void RenderCRTC(int interlace);
#if defined(_WIN32) && defined(_DEBUG)
HANDLE g_hCurrentThread = NULL;
#endif
extern int VALIDATE_THRESH;
extern u32 TEXDESTROY_THRESH;
#ifdef _WIN32
HWND GShwnd = NULL;
#endif
u32 THR_KeyEvent = 0; // Value for key event processing between threads
bool THR_bShift = false;
u32 CALLBACK PS2EgetLibType()
{
return PS2E_LT_GS;
@ -130,55 +120,6 @@ void CALLBACK GSsetLogDir(const char* dir)
ZZLog::SetDir(dir);
}
void ReportHacks(gameHacks hacks)
{
if (hacks.texture_targs) ZZLog::WriteLn("'Texture targs' hack enabled.");
if (hacks.auto_reset) ZZLog::WriteLn("'Auto reset' hack enabled.");
if (hacks.interlace_2x) ZZLog::WriteLn("'Interlace 2x' hack enabled.");
if (hacks.texa) ZZLog::WriteLn("'Texa' hack enabled.");
if (hacks.no_target_resolve) ZZLog::WriteLn("'No target resolve' hack enabled.");
if (hacks.exact_color) ZZLog::WriteLn("Exact color hack enabled.");
if (hacks.no_color_clamp) ZZLog::WriteLn("'No color clamp' hack enabled.");
if (hacks.no_alpha_fail) ZZLog::WriteLn("'No alpha fail' hack enabled.");
if (hacks.no_depth_update) ZZLog::WriteLn("'No depth update' hack enabled.");
if (hacks.quick_resolve_1) ZZLog::WriteLn("'Quick resolve 1' enabled.");
if (hacks.no_quick_resolve) ZZLog::WriteLn("'No Quick resolve' hack enabled.");
if (hacks.no_target_clut) ZZLog::WriteLn("'No target clut' hack enabled.");
if (hacks.no_stencil) ZZLog::WriteLn("'No stencil' hack enabled.");
if (hacks.vss_hack_off) ZZLog::WriteLn("VSS hack enabled.");
if (hacks.no_depth_resolve) ZZLog::WriteLn("'No depth resolve' hack enabled.");
if (hacks.full_16_bit_res) ZZLog::WriteLn("'Full 16 bit resolution' hack enabled.");
if (hacks.resolve_promoted) ZZLog::WriteLn("'Resolve promoted' hack enabled.");
if (hacks.fast_update) ZZLog::WriteLn("'Fast update' hack enabled.");
if (hacks.no_alpha_test) ZZLog::WriteLn("'No alpha test' hack enabled.");
if (hacks.disable_mrt_depth) ZZLog::WriteLn("'Disable mrt depth' hack enabled.");
if (hacks.args_32_bit) ZZLog::WriteLn("'Args 32 bit' hack enabled.");
//if (hacks.path3) ZZLog::WriteLn("'Path3' hack enabled.");
if (hacks.parallel_context) ZZLog::WriteLn("'Parallel context' hack enabled.");
if (hacks.xenosaga_spec) ZZLog::WriteLn("'Xenosaga spec' hack enabled.");
if (hacks.partial_pointers) ZZLog::WriteLn("'Partial pointers' hack enabled.");
if (hacks.partial_depth) ZZLog::WriteLn("'Partial depth' hack enabled.");
if (hacks.reget) ZZLog::WriteLn("Reget hack enabled.");
if (hacks.gust) ZZLog::WriteLn("Gust hack enabled.");
if (hacks.no_logz) ZZLog::WriteLn("'No logz' hack enabled.");
if (hacks.automatic_skip_draw) ZZLog::WriteLn("'Automatic skip draw' hack enabled.");
}
void ListHacks()
{
if ((!conf.disableHacks) && (conf.def_hacks._u32 != 0))
{
ZZLog::WriteLn("AutoEnabling these hacks:");
ReportHacks(conf.def_hacks);
}
if (conf.hacks._u32 != 0)
{
ZZLog::WriteLn("You've manually enabled these hacks:");
ReportHacks(conf.hacks);
}
}
void CALLBACK GSsetGameCRC(int crc, int options)
{
// build a list of function pointer for GetSkipCount (SkipDraw)
@ -190,10 +131,6 @@ void CALLBACK GSsetGameCRC(int crc, int options)
inited = true;
memset(GSC_list, 0, sizeof(GSC_list));
// for(int i = 0; i < NUMBER_OF_TITLES; i++)
// {
// GSC_list[i] = GSC_Null;
// }
GSC_list[Okami] = GSC_Okami;
GSC_list[MetalGearSolid3] = GSC_MetalGearSolid3;
@ -207,7 +144,7 @@ void CALLBACK GSsetGameCRC(int crc, int options)
GSC_list[OnePieceGrandBattle] = GSC_OnePieceGrandBattle;
GSC_list[ICO] = GSC_ICO;
GSC_list[GT4] = GSC_GT4;
//FIXME GSC_list[WildArms4] = GSC_WildArms4;
GSC_list[WildArms4] = GSC_WildArms4;
GSC_list[WildArms5] = GSC_WildArms5;
GSC_list[Manhunt2] = GSC_Manhunt2;
GSC_list[CrashBandicootWoC] = GSC_CrashBandicootWoC;
@ -296,12 +233,28 @@ void CALLBACK GSsetFrameSkip(int frameskip)
void CALLBACK GSreset()
{
ZeroGS::GSReset();
FUNCLOG
memset(&gs, 0, sizeof(gs));
ZZGSStateReset();
gs.prac = 1;
prim = &gs._prim[0];
gs.imageTransfer = -1;
gs.q = 1;
}
void CALLBACK GSgifSoftReset(u32 mask)
{
ZeroGS::GSSoftReset(mask);
FUNCLOG
if (mask & 1) memset(&gs.path[0], 0, sizeof(gs.path[0]));
if (mask & 2) memset(&gs.path[1], 0, sizeof(gs.path[1]));
if (mask & 4) memset(&gs.path[2], 0, sizeof(gs.path[2]));
gs.imageTransfer = -1;
gs.q = 1;
}
s32 CALLBACK GSinit()
@ -318,60 +271,73 @@ s32 CALLBACK GSinit()
return 0;
}
#ifdef _WIN32
#ifdef _DEBUG
HANDLE g_hCurrentThread = NULL;
#endif
extern LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
extern HINSTANCE hInst;
#endif
s32 CALLBACK GSopen(void *pDsp, char *Title, int multithread)
__forceinline void InitMisc()
{
FUNCLOG
bool err;
g_GSMultiThreaded = multithread;
ZZLog::WriteLn("Calling GSopen.");
#ifdef _WIN32
#ifdef _DEBUG
g_hCurrentThread = GetCurrentThread();
#endif
#endif
LoadConfig();
strcpy(GStitle, Title);
err = GLWin.CreateWindow(pDsp);
if (!err)
{
ZZLog::Error_Log("Failed to create window. Exiting...");
return -1;
}
ZZLog::GS_Log("Using %s:%d.%d.%d.", libraryName, zgsrevision, zgsbuild, zgsminor);
ZZLog::WriteLn("Creating ZZOgl window.");
if (!ZeroGS::Create(conf.width, conf.height)) return -1;
ZZLog::WriteLn("Initialization successful.");
WriteBilinear();
WriteAA();
InitProfile();
InitPath();
ResetRegs();
}
s32 CALLBACK GSopen(void *pDsp, char *Title, int multithread)
{
FUNCLOG
g_GSMultiThreaded = multithread;
ZZLog::WriteLn("Calling GSopen.");
#if defined(_WIN32) && defined(_DEBUG)
g_hCurrentThread = GetCurrentThread();
#endif
LoadConfig();
strcpy(GLWin.title, Title);
ZZLog::GS_Log("Using %s:%d.%d.%d.", libraryName, zgsrevision, zgsbuild, zgsminor);
ZZLog::WriteLn("Creating ZZOgl window.");
if ((!GLWin.CreateWindow(pDsp)) || (!ZZCreate(conf.width, conf.height))) return -1;
ZZLog::WriteLn("Initialization successful.");
InitMisc();
ZZLog::GS_Log("GSopen finished.");
return 0;
}
#ifdef USE_GOPEN2
s32 CALLBACK GSopen2( void* pDsp, INT32 flags )
{
FUNCLOG
bool err;
g_GSMultiThreaded = true;
ZZLog::WriteLn("Calling GSopen2.");
#if defined(_WIN32) && defined(_DEBUG)
g_hCurrentThread = GetCurrentThread();
#endif
LoadConfig();
ZZLog::GS_Log("Using %s:%d.%d.%d.", libraryName, zgsrevision, zgsbuild, zgsminor);
ZZLog::WriteLn("Capturing ZZOgl window.");
if ((!GLWin.GetWindow(pDsp)) || (!ZZCreate2(conf.width, conf.height))) return -1;// Needs to be added.
ZZLog::WriteLn("Initialization successful.");
InitMisc();
ZZLog::GS_Log("GSopen2 finished.");
return 0;
}
#endif
void CALLBACK GSshutdown()
{
FUNCLOG
@ -382,7 +348,7 @@ void CALLBACK GSclose()
{
FUNCLOG
ZeroGS::Destroy(1);
ZZDestroy();
GLWin.CloseWindow();
SaveStateFile = NULL;
@ -414,7 +380,7 @@ void CALLBACK GSchangeSaveState(int newstate, const char* filename)
char str[255];
sprintf(str, "save state %d", newstate);
ZeroGS::AddMessage(str);
ZZAddMessage(str);
CurrentSavestate = newstate;
SaveStateFile = filename;
@ -448,13 +414,12 @@ void CALLBACK GSmakeSnapshot(char *path)
if ((bmpfile = fopen(filename, "wb")) == NULL)
{
char strdir[255];
sprintf(strdir, "%s", path);
#ifdef _WIN32
sprintf(strdir, "%s", path);
CreateDirectory(strdir, NULL);
#else
sprintf(strdir, "mkdir %s", path);
system(strdir);
mkdir(path, 0777);
#endif
if ((bmpfile = fopen(filename, "wb")) == NULL) return;
@ -463,7 +428,7 @@ void CALLBACK GSmakeSnapshot(char *path)
fclose(bmpfile);
// get the bits
ZeroGS::SaveSnapshot(filename);
SaveSnapshot(filename);
}
// I'll probably move this somewhere else later, but it's got a ton of dependencies.
@ -491,7 +456,7 @@ static __forceinline void SetGSTitle()
100*g_nFramesSkipped / g_nFrame,
g_nGenVars / (float)UPDATE_FRAMES, g_nTexVars / (float)UPDATE_FRAMES, g_nAlphaVars / (float)UPDATE_FRAMES,
g_nResolve / (float)UPDATE_FRAMES, (ppf&0xfffff) / (float)UPDATE_FRAMES,
ZeroGS::g_MemTargs.listTargets.size(), ZeroGS::g_MemTargs.listClearedTargets.size(), g_TransferredToGPU >> 10);
g_MemTargs.listTargets.size(), g_MemTargs.listClearedTargets.size(), g_TransferredToGPU >> 10);
//_snprintf(strtitle, 512, "%x %x", *(int*)(g_pbyGSMemory + 256 * 0x3e0c + 4), *(int*)(g_pbyGSMemory + 256 * 0x3e04 + 4));
#endif
@ -517,14 +482,14 @@ void CALLBACK GSvsync(int interlace)
g_nRealFrame++;
// !interlace? Hmmm... Fixme.
ZeroGS::RenderCRTC(!interlace);
RenderCRTC(!interlace);
ProcessEvents();
GLWin.ProcessEvents();
if (--nToNextUpdate <= 0)
{
u32 d = timeGetTime();
fFPS = UPDATE_FRAMES * 1000.0f / (float)max(d - dwTime, 1);
fFPS = UPDATE_FRAMES * 1000.0f / (float)max(d - dwTime, (u32)1);
dwTime = d;
g_nFrame += UPDATE_FRAMES;
SetGSTitle();
@ -571,7 +536,7 @@ void CALLBACK GSreadFIFO(u64 *pMem)
//ZZLog::GS_Log("Calling GSreadFIFO.");
ZeroGS::TransferLocalHost((u32*)pMem, 1);
TransferLocalHost((u32*)pMem, 1);
}
void CALLBACK GSreadFIFO2(u64 *pMem, int qwc)
@ -580,7 +545,7 @@ void CALLBACK GSreadFIFO2(u64 *pMem, int qwc)
//ZZLog::GS_Log("Calling GSreadFIFO2.");
ZeroGS::TransferLocalHost((u32*)pMem, qwc);
TransferLocalHost((u32*)pMem, qwc);
}
int CALLBACK GSsetupRecording(int start, void* pData)
@ -588,9 +553,9 @@ int CALLBACK GSsetupRecording(int start, void* pData)
FUNCLOG
if (start)
ZeroGS::StartCapture();
StartCapture();
else
ZeroGS::StopCapture();
StopCapture();
return 1;
}
@ -602,16 +567,16 @@ s32 CALLBACK GSfreeze(int mode, freezeData *data)
switch (mode)
{
case FREEZE_LOAD:
if (!ZeroGS::Load(data->data)) ZZLog::Error_Log("GS: Bad load format!");
if (!ZZLoad(data->data)) ZZLog::Error_Log("GS: Bad load format!");
g_nRealFrame += 100;
break;
case FREEZE_SAVE:
ZeroGS::Save(data->data);
ZZSave(data->data);
break;
case FREEZE_SIZE:
data->size = ZeroGS::Save(NULL);
data->size = ZZSave(NULL);
break;
default:

View File

@ -19,7 +19,6 @@
#include "GS.h"
#include "Mem.h"
#include "zerogs.h"
#include "GifTransfer.h"
#ifdef _DEBUG
@ -168,7 +167,7 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
switch (gs.imageTransfer)
{
case 0:
ZeroGS::TransferHostLocal(pMem, len * 4);
TransferHostLocal(pMem, len * 4);
break;
case 1:
@ -176,11 +175,11 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
// a GIFtag operation. They're an entirely separate process that can only be
// done through the ReverseFIFO transfer (aka ReadFIFO). --air
assert(0);
//ZeroGS::TransferLocalHost(pMem, len);
//TransferLocalHost(pMem, len);
break;
case 2:
//ZeroGS::TransferLocalLocal();
//TransferLocalLocal();
break;
case 3:

View File

@ -20,9 +20,9 @@
#ifndef GIFTRANSFER_H_INCLUDED
#define GIFTRANSFER_H_INCLUDED
#include "Util.h"
#include "GS.h"
#include "Regs.h"
#include "Util.h"
enum GIF_FLG
{

View File

@ -24,509 +24,555 @@
#include <stdlib.h>
#include "Mem.h"
#include "x86.h"
#include "zerogs.h"
#include "targets.h"
#include "ZZoglVB.h"
// flush current vertices, call before setting new registers (the main render method)
extern void Flush(int context);
u8* g_pbyGSMemory = NULL; // 4Mb GS system mem
u8* g_pbyGSMemory = NULL; // 4Mb GS system mem
void GSMemory::init()
{
const u32 mem_size = MEMORY_END + 0x10000; // leave some room for out of range accesses (saves on the checks)
void GSMemory::init()
{
const u32 mem_size = MEMORY_END + 0x10000; // leave some room for out of range accesses (saves on the checks)
// clear
g_pbyGSMemory = (u8*)_aligned_malloc(mem_size, 1024);
memset(g_pbyGSMemory, 0, mem_size);
}
// clear
g_pbyGSMemory = (u8*)_aligned_malloc(mem_size, 1024);
memset(g_pbyGSMemory, 0, mem_size);
}
void GSMemory::destroy()
{
_aligned_free(g_pbyGSMemory);
g_pbyGSMemory = NULL;
}
void GSMemory::destroy()
{
_aligned_free(g_pbyGSMemory);
g_pbyGSMemory = NULL;
}
u8* GSMemory::get() { return g_pbyGSMemory; }
u8* GSMemory::get()
{
return g_pbyGSMemory;
}
u8* GSMemory::get(u32 addr) { return &g_pbyGSMemory[addr*8]; }
u8* GSMemory::get_raw(u32 addr) { return &g_pbyGSMemory[addr]; }
u8* GSMemory::get(u32 addr)
{
return &g_pbyGSMemory[addr*8];
}
u8* GSMemory::get_raw(u32 addr)
{
return &g_pbyGSMemory[addr];
}
u8* g_pbyGSClut = NULL; // ZZ
u8* g_pbyGSClut = NULL; // ZZ
void GSClut::init()
{
g_pbyGSClut = (u8*)_aligned_malloc(256 * 8, 1024); // need 512 alignment!
memset(g_pbyGSClut, 0, 256*8);
}
void GSClut::init()
{
g_pbyGSClut = (u8*)_aligned_malloc(256 * 8, 1024); // need 512 alignment!
memset(g_pbyGSClut, 0, 256*8);
}
void GSClut::destroy()
{
_aligned_free(g_pbyGSClut);
g_pbyGSClut = NULL;
}
void GSClut::destroy()
{
_aligned_free(g_pbyGSClut);
g_pbyGSClut = NULL;
}
u8* GSClut::get() { return g_pbyGSClut; }
u8* GSClut::get()
{
return g_pbyGSClut;
}
u8* GSClut::get(u32 addr) { return &g_pbyGSClut[addr*8]; }
u8* GSClut::get_raw(u32 addr) { return &g_pbyGSClut[addr]; }
u8* GSClut::get(u32 addr)
{
return &g_pbyGSClut[addr*8];
}
u8* GSClut::get_raw(u32 addr)
{
return &g_pbyGSClut[addr];
}
extern _getPixelAddress getPixelFun[64];
extern _getPixelAddress getPixelFun[64];
namespace ZeroGS
{
extern CRangeManager s_RangeMngr; // manages overwritten memory
extern void ResolveInRange(int start, int end);
extern CRangeManager s_RangeMngr; // manages overwritten memory
extern void ResolveInRange(int start, int end);
static vector<u8> s_vTempBuffer, s_vTransferCache;
static int gs_imageEnd = 0;
static vector<u8> s_vTempBuffer, s_vTransferCache;
static int gs_imageEnd = 0;
// From the start of monster labs. In all 3 cases, psm == 0.
// ZZogl-PG: GetRectMemAddress(0x3f4000, 0x404000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f40, 0x100);
// ZZogl-PG: GetRectMemAddress(0x3f8000, 0x408000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f80, 0x100);
// ZZogl-PG: GetRectMemAddress(0x3fc000, 0x40c000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3fc0, 0x100);
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw)
{
FUNCLOG
u32 bits = 0;
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw)
{
FUNCLOG
u32 bits = 0;
if (m_Blocks[psm].bpp == 0)
{
ZZLog::Error_Log("ZeroGS: Bad psm 0x%x.", psm);
start = 0;
end = MEMORY_END;
return;
}
if (m_Blocks[psm].bpp == 0)
{
ZZLog::Error_Log("ZeroGS: Bad psm 0x%x.", psm);
start = 0;
end = MEMORY_END;
return;
}
if (PSMT_ISZTEX(psm))
{
// Somehow, I doubt this code is right. I'll have to look into it. For the moment, I'm keeping it the
// way it was. --arcum42
if (PSMT_ISZTEX(psm))
{
// Somehow, I doubt this code is right. I'll have to look into it. For the moment, I'm keeping it the
// way it was. --arcum42
const BLOCK& b = m_Blocks[psm];
const BLOCK& b = m_Blocks[psm];
bw = (bw + b.width - 1) / b.width;
start = bp * 256 + ((y / b.height) * bw + (x / b.width)) * 0x2000;
end = bp * 256 + (((y + h - 1) / b.height) * bw + (x + w + b.width - 1) / b.width) * 0x2000;
return;
}
bw = (bw + b.width - 1) / b.width;
start = bp * 256 + ((y / b.height) * bw + (x / b.width)) * 0x2000;
end = bp * 256 + (((y + h - 1) / b.height) * bw + (x + w + b.width - 1) / b.width) * 0x2000;
return;
}
bits = PSMT_BITS_NUM(psm);
start = getPixelFun[psm](x, y, bp, bw);
end = getPixelFun[psm](x + w - 1, y + h - 1, bp, bw) + 1;
bits = PSMT_BITS_NUM(psm);
start = getPixelFun[psm](x, y, bp, bw);
end = getPixelFun[psm](x + w - 1, y + h - 1, bp, bw) + 1;
if (bits > 0)
{
start *= bits;
end *= bits;
}
else
{
// This is what it used to do, which doesn't seem right.
// Keeping it for reference, in case removing it breaks anything.
if (bits > 0)
{
start *= bits;
end *= bits;
}
else
{
// This is what it used to do, which doesn't seem right.
// Keeping it for reference, in case removing it breaks anything.
//int newx = ((x + w - 1 + 31) & ~31) - 1;
//int newy = ((y + h - 1 + 15) & ~15) - 1;
//start = getPixelAddress4(x, y, bp, bw) / 2;
//end = (getPixelAddress4(max(newx, x), max(newy, y), bp, bw) + 2) / 2;
//int newx = ((x + w - 1 + 31) & ~31) - 1;
//int newy = ((y + h - 1 + 15) & ~15) - 1;
//start = getPixelAddress4(x, y, bp, bw) / 2;
//end = (getPixelAddress4(max(newx, x), max(newy, y), bp, bw) + 2) / 2;
start /= 2;
end /= 2;
}
}
start /= 2;
end /= 2;
}
}
void InitTransferHostLocal()
{
FUNCLOG
void InitTransferHostLocal()
{
FUNCLOG
#if defined(ZEROGS_DEVBUILD)
if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw)
ZZLog::Debug_Log("Transfer error, width exceeded.");
#endif
#if defined(_DEBUG)
// Xenosaga 1.
if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw)
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0X%x)", gs.trxpos.dx + gs.imageWnew, gs.dstbuf.bw);
#endif
//bool bHasFlushed = false;
//bool bHasFlushed = false;
gs.imageX = gs.trxpos.dx;
gs.imageY = gs.trxpos.dy;
gs.imageX = gs.trxpos.dx;
gs.imageY = gs.trxpos.dy;
gs.imageEndX = gs.imageX + gs.imageWnew;
gs.imageEndY = gs.imageY + gs.imageHnew;
gs.imageEndX = gs.imageX + gs.imageWnew;
gs.imageEndY = gs.imageY + gs.imageHnew;
assert(gs.imageEndX < 2048 && gs.imageEndY < 2048);
assert(gs.imageEndX < 2048 && gs.imageEndY < 2048);
// This needs to be looked in to, since psm should *not* be 63.
// hack! viewful joe
if (gs.dstbuf.psm == 63) gs.dstbuf.psm = 0;
// This needs to be looked in to, since psm should *not* be 63.
// hack! viewful joe
if (gs.dstbuf.psm == 63) gs.dstbuf.psm = 0;
int start, end;
int start, end;
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
if (end > MEMORY_END)
{
ZZLog::Warn_Log("Init host local out of bounds! (end == 0x%x)", end);
//gs.imageTransfer = -1;
end = MEMORY_END;
}
if (end > MEMORY_END)
{
// Monster Lab - the screwed up title screen
// Init host local out of bounds! (end == 0x404000)
// Init host local out of bounds! (end == 0x408000)
// Init host local out of bounds! (end == 0x40c000)
// MEMORY_END is 0x400000...
gs_imageEnd = end;
ZZLog::Warn_Log("Init host local out of bounds! (end == 0x%x)", end);
//gs.imageTransfer = -1;
end = MEMORY_END;
}
if (vb[0].nCount > 0) Flush(0);
if (vb[1].nCount > 0) Flush(1);
gs_imageEnd = end;
//ZZLog::Prim_Log("trans: bp:%x x:%x y:%x w:%x h:%x\n", gs.dstbuf.bp, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew);
}
if (vb[0].nCount > 0) Flush(0);
if (vb[1].nCount > 0) Flush(1);
void TransferHostLocal(const void* pbyMem, u32 nQWordSize)
{
FUNCLOG
//ZZLog::Prim_Log("trans: bp:%x x:%x y:%x w:%x h:%x\n", gs.dstbuf.bp, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew);
}
int start, end;
void TransferHostLocal(const void* pbyMem, u32 nQWordSize)
{
FUNCLOG
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.imageX, gs.imageY, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
int start, end;
assert(start < gs_imageEnd);
end = gs_imageEnd;
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.imageX, gs.imageY, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
// sometimes games can decompress to alpha channel of render target only, in this case
// do a resolve right away. wolverine x2
if (((gs.dstbuf.psm == PSMT8H) || (gs.dstbuf.psm == PSMT4HL) || (gs.dstbuf.psm == PSMT4HH)) && !(conf.settings().gust))
{
list<CRenderTarget*> listTransmissionUpdateTargs;
s_RTs.GetTargs(start, end, listTransmissionUpdateTargs);
assert(start < gs_imageEnd);
end = gs_imageEnd;
for (list<CRenderTarget*>::iterator it = listTransmissionUpdateTargs.begin(); it != listTransmissionUpdateTargs.end(); ++it)
{
CRenderTarget* ptarg = *it;
// sometimes games can decompress to alpha channel of render target only, in this case
// do a resolve right away. wolverine x2
if (((gs.dstbuf.psm == PSMT8H) || (gs.dstbuf.psm == PSMT4HL) || (gs.dstbuf.psm == PSMT4HH)) && !(conf.settings().gust))
{
list<CRenderTarget*> listTransmissionUpdateTargs;
s_RTs.GetTargs(start, end, listTransmissionUpdateTargs);
if ((ptarg->status & CRenderTarget::TS_Virtual)) continue;
for (list<CRenderTarget*>::iterator it = listTransmissionUpdateTargs.begin(); it != listTransmissionUpdateTargs.end(); ++it)
{
CRenderTarget* ptarg = *it;
//ZZLog::Error_Log("Resolving to alpha channel.");
ptarg->Resolve();
}
}
if ((ptarg->status & CRenderTarget::TS_Virtual)) continue;
s_RangeMngr.Insert(start, min(end, start + (int)nQWordSize*16));
//ZZLog::Error_Log("Resolving to alpha channel.");
ptarg->Resolve();
}
}
const u8* porgend = (const u8*)pbyMem + 4 * nQWordSize;
s_RangeMngr.Insert(start, min(end, start + (int)nQWordSize*16));
if (s_vTransferCache.size() > 0)
{
const u8* porgend = (const u8*)pbyMem + 4 * nQWordSize;
int imagecache = s_vTransferCache.size();
s_vTempBuffer.resize(imagecache + nQWordSize*4);
memcpy(&s_vTempBuffer[0], &s_vTransferCache[0], imagecache);
memcpy(&s_vTempBuffer[imagecache], pbyMem, nQWordSize*4);
if (s_vTransferCache.size() > 0)
{
pbyMem = (const void*) & s_vTempBuffer[0];
porgend = &s_vTempBuffer[0] + s_vTempBuffer.size();
int imagecache = s_vTransferCache.size();
s_vTempBuffer.resize(imagecache + nQWordSize*4);
memcpy(&s_vTempBuffer[0], &s_vTransferCache[0], imagecache);
memcpy(&s_vTempBuffer[imagecache], pbyMem, nQWordSize*4);
int wordinc = imagecache / 4;
pbyMem = (const void*) & s_vTempBuffer[0];
porgend = &s_vTempBuffer[0] + s_vTempBuffer.size();
if ((nQWordSize * 4 + imagecache) / 3 == ((nQWordSize + wordinc) * 4) / 3)
{
// can use the data
nQWordSize += wordinc;
}
}
int wordinc = imagecache / 4;
int leftover = m_Blocks[gs.dstbuf.psm].TransferHostLocal(pbyMem, nQWordSize);
if ((nQWordSize * 4 + imagecache) / 3 == ((nQWordSize + wordinc) * 4) / 3)
{
// can use the data
nQWordSize += wordinc;
}
}
if (leftover > 0)
{
// copy the last gs.image24bitOffset to the cache
s_vTransferCache.resize(leftover);
memcpy(&s_vTransferCache[0], porgend - leftover, leftover);
}
else
{
s_vTransferCache.resize(0);
}
int leftover = m_Blocks[gs.dstbuf.psm].TransferHostLocal(pbyMem, nQWordSize);
#if defined(_DEBUG)
if (g_bSaveTrans)
{
tex0Info t;
t.tbp0 = gs.dstbuf.bp;
t.tw = gs.imageWnew;
t.th = gs.imageHnew;
t.tbw = gs.dstbuf.bw;
t.psm = gs.dstbuf.psm;
SaveTex(&t, 0);
}
if (leftover > 0)
{
// copy the last gs.image24bitOffset to the cache
s_vTransferCache.resize(leftover);
memcpy(&s_vTransferCache[0], porgend - leftover, leftover);
}
else
{
s_vTransferCache.resize(0);
}
#endif
}
#if defined(_DEBUG)
if (g_bSaveTrans)
{
tex0Info t;
t.tbp0 = gs.dstbuf.bp;
t.tw = gs.imageWnew;
t.th = gs.imageHnew;
t.tbw = gs.dstbuf.bw;
t.psm = gs.dstbuf.psm;
SaveTex(&t, 0);
}
void InitTransferLocalHost()
{
FUNCLOG
assert(gs.trxpos.sx + gs.imageWnew <= 2048 && gs.trxpos.sy + gs.imageHnew <= 2048);
#endif
}
#if defined(ZEROGS_DEVBUILD)
if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw)
ZZLog::Debug_Log("Transfer error, width exceeded.");
#endif
void InitTransferLocalHost()
{
FUNCLOG
assert(gs.trxpos.sx + gs.imageWnew <= 2048 && gs.trxpos.sy + gs.imageHnew <= 2048);
gs.imageX = gs.trxpos.sx;
gs.imageY = gs.trxpos.sy;
#if defined(_DEBUG)
if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw)
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0x%x)", gs.trxpos.sx + gs.imageWnew, gs.srcbuf.bw);
#endif
gs.imageEndX = gs.imageX + gs.imageWnew;
gs.imageEndY = gs.imageY + gs.imageHnew;
gs.imageX = gs.trxpos.sx;
gs.imageY = gs.trxpos.sy;
s_vTransferCache.resize(0);
gs.imageEndX = gs.imageX + gs.imageWnew;
gs.imageEndY = gs.imageY + gs.imageHnew;
int start, end;
s_vTransferCache.resize(0);
GetRectMemAddress(start, end, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageWnew, gs.imageHnew, gs.srcbuf.bp, gs.srcbuf.bw);
int start, end;
ResolveInRange(start, end);
}
GetRectMemAddress(start, end, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageWnew, gs.imageHnew, gs.srcbuf.bp, gs.srcbuf.bw);
template <class T>
void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
{
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
ResolveInRange(start, end);
}
int i = x, j = y;
T* pbuf = (T*)pbyMem;
u32 nSize = nQWordSize * 16 / sizeof(T);
template <class T>
void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
{
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
for (; i < gs.imageEndY; ++i)
{
for (; j < gs.imageEndX && nSize > 0; ++j, --nSize)
{
*pbuf++ = rp(pstart, j % 2048, i % 2048, gs.srcbuf.bw);
}
int i = x, j = y;
T* pbuf = (T*)pbyMem;
u32 nSize = nQWordSize * 16 / sizeof(T);
if (j >= gs.imageEndX)
{
assert(j == gs.imageEndX);
j = gs.trxpos.sx;
}
else
{
assert(nSize == 0);
break;
}
}
}
for (; i < gs.imageEndY; ++i)
{
for (; j < gs.imageEndX && nSize > 0; ++j, --nSize)
{
*pbuf++ = rp(pstart, j % 2048, i % 2048, gs.srcbuf.bw);
}
void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
{
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
if (j >= gs.imageEndX)
{
assert(j == gs.imageEndX);
j = gs.trxpos.sx;
}
else
{
assert(nSize == 0);
break;
}
}
}
int i = x, j = y;
u8* pbuf = (u8*)pbyMem;
u32 nSize = nQWordSize * 16 / 3;
void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
{
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
for (; i < gs.imageEndY; ++i)
{
for (; j < gs.imageEndX && nSize > 0; ++j, --nSize)
{
u32 p = rp(pstart, j % 2048, i % 2048, gs.srcbuf.bw);
pbuf[0] = (u8)p;
pbuf[1] = (u8)(p >> 8);
pbuf[2] = (u8)(p >> 16);
pbuf += 3;
}
int i = x, j = y;
u8* pbuf = (u8*)pbyMem;
u32 nSize = nQWordSize * 16 / 3;
if (j >= gs.imageEndX)
{
assert(j == gs.imageEndX);
j = gs.trxpos.sx;
}
else
{
assert(nSize == 0);
break;
}
}
}
for (; i < gs.imageEndY; ++i)
{
for (; j < gs.imageEndX && nSize > 0; ++j, --nSize)
{
u32 p = rp(pstart, j % 2048, i % 2048, gs.srcbuf.bw);
pbuf[0] = (u8)p;
pbuf[1] = (u8)(p >> 8);
pbuf[2] = (u8)(p >> 16);
pbuf += 3;
}
// left/right, top/down
void TransferLocalHost(void* pbyMem, u32 nQWordSize)
{
FUNCLOG
assert(gs.imageTransfer == 1);
if (j >= gs.imageEndX)
{
assert(j == gs.imageEndX);
j = gs.trxpos.sx;
}
else
{
assert(nSize == 0);
break;
}
}
}
u8* pstart = g_pbyGSMemory + 256 * gs.srcbuf.bp;
// left/right, top/down
void TransferLocalHost(void* pbyMem, u32 nQWordSize)
{
FUNCLOG
assert(gs.imageTransfer == 1);
switch(PSMT_BITMODE(gs.srcbuf.psm))
{
case 0: TransferLocalHost<u32>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
case 1: TransferLocalHost_24(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
case 2: TransferLocalHost<u16>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
case 3: TransferLocalHost<u8>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
default: assert(0); break;
}
u8* pstart = g_pbyGSMemory + 256 * gs.srcbuf.bp;
if (gs.imageY >= gs.imageEndY)
{
ZZLog::Error_Log("gs.imageY >= gs.imageEndY!");
assert(gs.imageY == gs.imageEndY);
gs.imageTransfer = -1;
}
}
switch(PSMT_BITMODE(gs.srcbuf.psm))
{
case 0:
TransferLocalHost<u32>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
break;
case 1:
TransferLocalHost_24(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
break;
case 2:
TransferLocalHost<u16>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
break;
case 3:
TransferLocalHost<u8>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
break;
default:
assert(0);
break;
}
if (gs.imageY >= gs.imageEndY)
{
ZZLog::Error_Log("gs.imageY >= gs.imageEndY!");
assert(gs.imageY == gs.imageEndY);
gs.imageTransfer = -1;
}
}
__forceinline void _TransferLocalLocal()
{
//ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
_writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm];
_readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm];
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 widthlimit = 4;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
//ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
_writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm];
_readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm];
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 widthlimit = 4;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2;
if ((gs.imageWnew & widthlimit) != 0) return;
if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2;
if ((gs.imageWnew & widthlimit) != 0) return;
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; i++, i2++)
{
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += widthlimit, j2 += widthlimit)
{
wp(pDstBuf, j2%2048, i2%2048,
rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; i++, i2++)
{
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += widthlimit, j2 += widthlimit)
{
wp(pDstBuf, j2%2048, i2%2048,
rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
wp(pDstBuf, (j2+1)%2048, i2%2048,
rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
wp(pDstBuf, (j2+1)%2048, i2%2048,
rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
if (widthlimit > 2)
{
// Then widthlimit == 4.
wp(pDstBuf, (j2+2)%2048, i2%2048,
rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
if (widthlimit > 2)
{
// Then widthlimit == 4.
wp(pDstBuf, (j2+2)%2048, i2%2048,
rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
wp(pDstBuf, (j2+3)%2048, i2%2048,
rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
}
}
}
wp(pDstBuf, (j2+3)%2048, i2%2048,
rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
}
}
}
}
__forceinline void _TransferLocalLocal_4()
{
//ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
_getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm];
_getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm];
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
//ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
_getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm];
_getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm];
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
assert((gs.imageWnew % 8) == 0);
assert((gs.imageWnew % 8) == 0);
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; ++i, ++i2)
{
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += 8, j2 += 8)
{
/* NOTE: the 2 conseq 4bit values are in NOT in the same byte */
u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw);
u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; ++i, ++i2)
{
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += 8, j2 += 8)
{
/* NOTE: the 2 conseq 4bit values are in NOT in the same byte */
u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw);
u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+4)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+4)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+4)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+4)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+5)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+5)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+5)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+5)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+6)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+6)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+6)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+6)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+7)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+7)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
}
}
read = gsp((j+7)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+7)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
}
}
}
// dir depends on trxpos.dirx & trxpos.diry
void TransferLocalLocal()
{
FUNCLOG
// dir depends on trxpos.dirx & trxpos.diry
void TransferLocalLocal()
{
FUNCLOG
//ZZLog::Error_Log("I'z in your code, transferring your memory...");
assert(gs.imageTransfer == 2);
assert(gs.trxpos.sx + gs.imageWnew < 2048 && gs.trxpos.sy + gs.imageHnew < 2048);
assert(gs.trxpos.dx + gs.imageWnew < 2048 && gs.trxpos.dy + gs.imageHnew < 2048);
assert((gs.srcbuf.psm&0x7) == (gs.dstbuf.psm&0x7));
//ZZLog::Error_Log("I'z in your code, transferring your memory...");
assert(gs.imageTransfer == 2);
assert(gs.trxpos.sx + gs.imageWnew < 2048 && gs.trxpos.sy + gs.imageHnew < 2048);
assert(gs.trxpos.dx + gs.imageWnew < 2048 && gs.trxpos.dy + gs.imageHnew < 2048);
assert((gs.srcbuf.psm&0x7) == (gs.dstbuf.psm&0x7));
if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw)
ZZLog::Debug_Log("Transfer error, src width exceeded.");
if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw)
ZZLog::Debug_Log("Transfer error, src width exceeded.(0x%x > 0x%x)", gs.trxpos.sx + gs.imageWnew, gs.srcbuf.bw);
if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw)
ZZLog::Debug_Log("Transfer error, dst width exceeded.");
if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw)
ZZLog::Debug_Log("Transfer error, dst width exceeded.(0x%x > 0x%x)", gs.trxpos.dx + gs.imageWnew, gs.dstbuf.bw);
int srcstart, srcend, dststart, dstend;
int srcstart, srcend, dststart, dstend;
GetRectMemAddress(srcstart, srcend, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageWnew, gs.imageHnew, gs.srcbuf.bp, gs.srcbuf.bw);
GetRectMemAddress(dststart, dstend, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
GetRectMemAddress(srcstart, srcend, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageWnew, gs.imageHnew, gs.srcbuf.bp, gs.srcbuf.bw);
GetRectMemAddress(dststart, dstend, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
// resolve the targs
ResolveInRange(srcstart, srcend);
// resolve the targs
ResolveInRange(srcstart, srcend);
list<CRenderTarget*> listTargs;
list<CRenderTarget*> listTargs;
s_RTs.GetTargs(dststart, dstend, listTargs);
s_RTs.GetTargs(dststart, dstend, listTargs);
for (list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ++it)
{
if (!((*it)->status & CRenderTarget::TS_Virtual))
{
(*it)->Resolve();
//(*it)->status |= CRenderTarget::TS_NeedUpdate;
}
}
for (list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ++it)
{
if (!((*it)->status & CRenderTarget::TS_Virtual))
{
(*it)->Resolve();
//(*it)->status |= CRenderTarget::TS_NeedUpdate;
}
}
if (PSMT_BITMODE(gs.srcbuf.psm) != 4)
{
_TransferLocalLocal();
}
else
{
_TransferLocalLocal_4();
}
if (PSMT_BITMODE(gs.srcbuf.psm) != 4)
{
_TransferLocalLocal();
}
else
{
_TransferLocalLocal_4();
}
g_MemTargs.ClearRange(dststart, dstend);
g_MemTargs.ClearRange(dststart, dstend);
#ifdef ZEROGS_DEVBUILD
#ifdef ZEROGS_DEVBUILD
if (g_bSaveTrans)
{
tex0Info t;
t.tbp0 = gs.dstbuf.bp;
t.tw = gs.imageWnew;
t.th = gs.imageHnew;
t.tbw = gs.dstbuf.bw;
t.psm = gs.dstbuf.psm;
SaveTex(&t, 0);
if (g_bSaveTrans)
{
tex0Info t;
t.tbp0 = gs.dstbuf.bp;
t.tw = gs.imageWnew;
t.th = gs.imageHnew;
t.tbw = gs.dstbuf.bw;
t.psm = gs.dstbuf.psm;
SaveTex(&t, 0);
t.tbp0 = gs.srcbuf.bp;
t.tw = gs.imageWnew;
t.th = gs.imageHnew;
t.tbw = gs.srcbuf.bw;
t.psm = gs.srcbuf.psm;
SaveTex(&t, 0);
}
t.tbp0 = gs.srcbuf.bp;
t.tw = gs.imageWnew;
t.th = gs.imageHnew;
t.tbw = gs.srcbuf.bw;
t.psm = gs.srcbuf.psm;
SaveTex(&t, 0);
}
#endif
}
#endif
}
__forceinline void TerminateLocalHost()
{
FUNCLOG
//ZZLog::Error_Log("Terminate Local Host!");
}
__forceinline void TerminateHostLocal()
{
FUNCLOG
gs.imageTransfer = -1;
}
}

View File

@ -0,0 +1,114 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef HOSTMEMORY_H_INCLUDED
#define HOSTMEMORY_H_INCLUDED
#include "GLWin.h"
#define MEMORY_END 0x00400000
extern int GPU_TEXWIDTH;
extern u8* g_pBasePS2Mem;
extern u8* g_pbyGSMemory;
class GSMemory
{
public:
void init();
void destroy();
u8* get();
u8* get(u32 addr);
u8* get_raw(u32 addr);
};
extern u8* g_pbyGSClut; // the temporary clut buffer
class GSClut
{
public:
void init();
void destroy();
u8* get();
u8* get(u32 addr);
u8* get_raw(u32 addr);
};
class ZeroGSInit
{
public:
ZeroGSInit()
{
const u32 mem_size = MEMORY_END + 0x10000; // leave some room for out of range accesses (saves on the checks)
// clear
g_pbyGSMemory = (u8*)_aligned_malloc(mem_size, 1024);
memset(g_pbyGSMemory, 0, mem_size);
g_pbyGSClut = (u8*)_aligned_malloc(256 * 8, 1024); // need 512 alignment!
memset(g_pbyGSClut, 0, 256*8);
memset(&GLWin, 0, sizeof(GLWin));
}
~ZeroGSInit()
{
_aligned_free(g_pbyGSMemory);
g_pbyGSMemory = NULL;
_aligned_free(g_pbyGSClut);
g_pbyGSClut = NULL;
}
};
// The size in bytes of x strings (of texture).
inline int MemorySize(int x)
{
return 4 * GPU_TEXWIDTH * x;
}
// Return the address in memory of data block for string x.
inline u8* MemoryAddress(int x)
{
return g_pbyGSMemory + MemorySize(x);
}
template <u32 mult>
inline u8* _MemoryAddress(int x)
{
return g_pbyGSMemory + mult * x;
}
extern void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
// called when trxdir is accessed. If host is involved, transfers memory to temp buffer byTransferBuf.
// Otherwise performs the transfer. TODO: Perhaps divide the transfers into chunks?
extern void InitTransferHostLocal();
extern void TransferHostLocal(const void* pbyMem, u32 nQWordSize);
extern void InitTransferLocalHost();
extern void TransferLocalHost(void* pbyMem, u32 nQWordSize);
extern void TransferLocalLocal();
extern void TerminateLocalHost();
extern void TerminateHostLocal();
#endif // HOSTMEMORY_H_INCLUDED

View File

@ -40,7 +40,7 @@ void SaveConfig()
fprintf(f, "mrtdepth = %hhx\n", conf.mrtdepth);
fprintf(f, "zzoptions = %x\n", conf.zz_options._u32);
fprintf(f, "options = %x\n", conf.hacks);
fprintf(f, "options = %x\n", conf.hacks._u32);
fprintf(f, "bilinear = %hhx\n", conf.bilinear);
fprintf(f, "aliasing = %hhx\n", conf.aa);
fprintf(f, "width = %x\n", conf.width);

View File

@ -24,7 +24,6 @@
#include "GS.h"
#include "Linux.h"
#include "zerogs.h"
#include "GLWin.h"
#include <map>
@ -495,7 +494,7 @@ void *SysLoadSym(void *lib, char *sym)
{
void *ret = dlsym(lib, sym);
if (ret == NULL) printf("null: %s\n", sym);
if (ret == NULL) ZZLog::Debug_Log("null: %s", sym);
return dlsym(lib, sym);
}

View File

@ -62,6 +62,7 @@
</Build>
<Compiler>
<Add option="`pkg-config gtk+-2.0 --cflags`" />
<Add option="`wx-config --version=2.8 --static=no --unicode=yes --cflags`" />
<Add option="-Wno-format" />
<Add option="-Wno-unused-parameter" />
<Add option="-Wno-unused-value" />
@ -106,6 +107,7 @@
<Unit filename="../../GifTransfer.cpp" />
<Unit filename="../../GifTransfer.h" />
<Unit filename="../../HostMemory.cpp" />
<Unit filename="../../HostMemory.h" />
<Unit filename="../Conf.cpp" />
<Unit filename="../Linux.cpp" />
<Unit filename="../Linux.h" />
@ -139,8 +141,14 @@
<Option compile="0" />
<Option link="0" />
</Unit>
<Unit filename="../../ZZClut.cpp" />
<Unit filename="../../ZZClut.h" />
<Unit filename="../../ZZGl.h" />
<Unit filename="../../ZZHacks.cpp" />
<Unit filename="../../ZZHacks.h" />
<Unit filename="../../ZZKeyboard.cpp" />
<Unit filename="../../ZZoglDrawing.cpp" />
<Unit filename="../../ZZoglDrawing.h" />
<Unit filename="../../ZZLog.cpp" />
<Unit filename="../../ZZLog.h" />
<Unit filename="../../ZZoglCRTC.cpp" />
@ -154,7 +162,9 @@
<Unit filename="../../ZZoglShaders.cpp" />
<Unit filename="../../ZZoglShaders.h" />
<Unit filename="../../ZZoglShoots.cpp" />
<Unit filename="../../ZZoglShoots.h" />
<Unit filename="../../ZZoglVB.cpp" />
<Unit filename="../../ZZoglVB.h" />
<Unit filename="../../common.h" />
<Unit filename="../../glprocs.c">
<Option compilerVar="CC" />

View File

@ -19,12 +19,14 @@
#include "GS.h"
#include "Mem.h"
#include "zerogs.h"
#include "targets.h"
#include "x86.h"
#include "Mem_Transmit.h"
#include "Mem_Swizzle.h"
#ifdef ZEROGS_SSE2
#include <emmintrin.h>
#endif
BLOCK m_Blocks[0x40]; // do so blocks are indexable
@ -128,8 +130,14 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf
for (int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += data.blockwidth, pbuf += TransPitch(data.blockwidth, data.transfersize) / TSize)
{
u8 *temp = pstart + fun.gp(tempj, tempY, gs.dstbuf.bw) * data.blockbits / 8;
swizzle(temp, (u8*)pbuf, TransPitch(pitch, data.transfersize), 0xffffffff);
swizzle(temp, (u8*)pbuf, TransPitch(pitch, data.transfersize));
}
#ifdef ZEROGS_SSE2
// Note: swizzle function uses some non temporal move (mm_stream) instruction.
// store fence insures that previous store are finish before execute new one.
_mm_sfence();
#endif
/* transfer the rest */
if (alignedPt.x < gs.imageEndX)
@ -158,8 +166,8 @@ static __forceinline int FinishTransfer(TransferData data, int nLeftOver)
assert(gs.imageTransfer == -1 || tempY == gs.imageEndY);
gs.imageTransfer = -1;
/*int start, end;
ZeroGS::GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
ZeroGS::g_MemTargs.ClearRange(start, end);*/
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
g_MemTargs.ClearRange(start, end);*/
}
else
{

View File

@ -42,7 +42,7 @@ typedef u32(*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
typedef u32(*_readPixel_0)(const void* pmem, int x, int y, u32 bw);
typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize);
typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask);
typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch);
extern _getPixelAddress_0 getPixelFun_0[64];
extern _writePixel_0 writePixelFun_0[64];

View File

@ -24,109 +24,54 @@
#include <emmintrin.h>
#endif
// WARNING a sfence instruction must be call after SwizzleBlock sse2 function
// Current port of the ASM function to intrinsic
#define INTRINSIC_PORT_32
#define INTRINSIC_PORT_16
#define INTRINSIC_PORT_8
#define INTRINSIC_PORT_4
#ifdef ZEROGS_SSE2
static const __aligned16 u32 mask_24b_H[4] = {0xFF000000, 0x0000FFFF, 0xFF000000, 0x0000FFFF};
static const __aligned16 u32 mask_24b_L[4] = {0x00FFFFFF, 0x00000000, 0x00FFFFFF, 0x00000000};
template<bool aligned>
__forceinline void SwizzleBlock32_sse2_I(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock32_sse2_I(u8 *dst, u8 *src, int pitch)
{
__m128i src_0;
__m128i src_1;
__m128i src_2;
__m128i src_3;
if (WriteMask == 0xffffffff) {
for (int i=3 ; i >= 0 ; --i) {
// load
if (aligned) {
src_0 = _mm_load_si128((__m128i*)src); // 5 4 1 0
src_1 = _mm_load_si128((__m128i*)(src+16)); // 13 12 9 8
src_2 = _mm_load_si128((__m128i*)(src+pitch)); // 7 6 3 2
src_3 = _mm_load_si128((__m128i*)(src+16+pitch)); // 15 14 11 10
} else {
src_0 = _mm_loadu_si128((__m128i*)src); // 5 4 1 0
src_1 = _mm_loadu_si128((__m128i*)(src+16)); // 13 12 9 8
src_2 = _mm_loadu_si128((__m128i*)(src+pitch)); // 7 6 3 2
src_3 = _mm_loadu_si128((__m128i*)(src+16+pitch)); // 15 14 11 10
}
// Reorder
__m128i dst_0 = _mm_unpacklo_epi64(src_0, src_2); // 3 2 1 0
__m128i dst_1 = _mm_unpackhi_epi64(src_0, src_2); // 7 6 5 4
__m128i dst_2 = _mm_unpacklo_epi64(src_1, src_3); // 11 10 9 8
__m128i dst_3 = _mm_unpackhi_epi64(src_1, src_3); // 15 14 13 12
// store
_mm_stream_si128((__m128i*)dst, dst_0);
_mm_stream_si128(((__m128i*)dst)+1, dst_1);
_mm_stream_si128(((__m128i*)dst)+2, dst_2);
_mm_stream_si128(((__m128i*)dst)+3, dst_3);
// update the pointer
dst += 64;
src += 2*pitch;
for (int i=3 ; i >= 0 ; --i) {
// load
if (aligned) {
src_0 = _mm_load_si128((__m128i*)src); // 5 4 1 0
src_1 = _mm_load_si128((__m128i*)(src+16)); // 13 12 9 8
src_2 = _mm_load_si128((__m128i*)(src+pitch)); // 7 6 3 2
src_3 = _mm_load_si128((__m128i*)(src+16+pitch)); // 15 14 11 10
} else {
src_0 = _mm_loadu_si128((__m128i*)src); // 5 4 1 0
src_1 = _mm_loadu_si128((__m128i*)(src+16)); // 13 12 9 8
src_2 = _mm_loadu_si128((__m128i*)(src+pitch)); // 7 6 3 2
src_3 = _mm_loadu_si128((__m128i*)(src+16+pitch)); // 15 14 11 10
}
}
else
{
// Build the mask (tranform a u32 to a 4 packets u32)
__m128i mask = _mm_cvtsi32_si128(WriteMask);
mask = _mm_shuffle_epi32(mask, 0);
for (int i=3 ; i >= 0 ; --i) {
// load
if (aligned) {
src_0 = _mm_load_si128((__m128i*)src); // 5 4 1 0
src_1 = _mm_load_si128((__m128i*)(src+16)); // 13 12 9 8
src_2 = _mm_load_si128((__m128i*)(src+pitch)); // 7 6 3 2
src_3 = _mm_load_si128((__m128i*)(src+16+pitch)); // 15 14 11 10
} else {
src_0 = _mm_loadu_si128((__m128i*)src); // 5 4 1 0
src_1 = _mm_loadu_si128((__m128i*)(src+16)); // 13 12 9 8
src_2 = _mm_loadu_si128((__m128i*)(src+pitch)); // 7 6 3 2
src_3 = _mm_loadu_si128((__m128i*)(src+16+pitch)); // 15 14 11 10
}
// Reorder
__m128i dst_0 = _mm_unpacklo_epi64(src_0, src_2); // 3 2 1 0
__m128i dst_1 = _mm_unpackhi_epi64(src_0, src_2); // 7 6 5 4
__m128i dst_2 = _mm_unpacklo_epi64(src_1, src_3); // 11 10 9 8
__m128i dst_3 = _mm_unpackhi_epi64(src_1, src_3); // 15 14 13 12
// Apply the WriteMask before reordering
src_0 = _mm_and_si128(src_0, mask);
src_1 = _mm_and_si128(src_1, mask);
src_2 = _mm_and_si128(src_2, mask);
src_3 = _mm_and_si128(src_3, mask);
// store
_mm_stream_si128((__m128i*)dst, dst_0);
_mm_stream_si128(((__m128i*)dst)+1, dst_1);
_mm_stream_si128(((__m128i*)dst)+2, dst_2);
_mm_stream_si128(((__m128i*)dst)+3, dst_3);
// Reorder
__m128i dst_0 = _mm_unpacklo_epi64(src_0, src_2); // 3 2 1 0
__m128i dst_1 = _mm_unpackhi_epi64(src_0, src_2); // 7 6 5 4
__m128i dst_2 = _mm_unpacklo_epi64(src_1, src_3); // 11 10 9 8
__m128i dst_3 = _mm_unpackhi_epi64(src_1, src_3); // 15 14 13 12
// Load previous value and apply the ~mask
__m128i old_dst_0 = _mm_andnot_si128(mask, _mm_load_si128((__m128i*)dst));
__m128i old_dst_1 = _mm_andnot_si128(mask, _mm_load_si128(((__m128i*)dst)+1));
__m128i old_dst_2 = _mm_andnot_si128(mask, _mm_load_si128(((__m128i*)dst)+2));
__m128i old_dst_3 = _mm_andnot_si128(mask, _mm_load_si128(((__m128i*)dst)+3));
// Build the final value
dst_0 = _mm_or_si128(dst_0, old_dst_0);
dst_1 = _mm_or_si128(dst_1, old_dst_1);
dst_2 = _mm_or_si128(dst_2, old_dst_2);
dst_3 = _mm_or_si128(dst_3, old_dst_3);
// store
_mm_stream_si128((__m128i*)dst, dst_0);
_mm_stream_si128(((__m128i*)dst)+1, dst_1);
_mm_stream_si128(((__m128i*)dst)+2, dst_2);
_mm_stream_si128(((__m128i*)dst)+3, dst_3);
// update the pointer
dst += 64;
src += 2*pitch;
}
}
// FIXME normally you must use a sfence but it would impact perf to do here
// the function is in a loop and it would have a better place after the loop...
// update the pointer
dst += 64;
src += 2*pitch;
}
}
template<bool aligned>
@ -173,8 +118,6 @@ __forceinline void SwizzleBlock16_sse2_I(u8 *dst, u8 *src, int pitch)
dst += 64;
src += 2*pitch;
}
// FIXME normally you must use a sfence but it would impact perf to do here
// the function is in a loop and it would have a better place after the loop...
}
// Template the code to improve reuse of code
@ -256,9 +199,6 @@ __forceinline void SwizzleBlock8_sse2_I(u8 *dst, u8 *src, int pitch)
dst += 64;
src += 4*pitch;
SwizzleColumn8_sse2_I<aligned, 3>(dst, src, pitch);
// FIXME normally you must use a sfence but it would impact perf to do here
// the function is in a loop and it would have a better place after the loop...
}
// Template the code to improve reuse of code
@ -372,130 +312,310 @@ __forceinline void SwizzleBlock4_sse2_I(u8 *dst, u8 *src, int pitch)
dst += 64;
src += 4*pitch;
SwizzleColumn4_sse2_I<aligned, 3>(dst, src, pitch);
// FIXME normally you must use a sfence but it would impact perf to do here
// the function is in a loop and it would have a better place after the loop...
}
#endif
template<bool FOUR_BIT, bool UPPER>
__forceinline void SwizzleBlock8H_4H(u8 *dst, u8 *src, int pitch)
{
__m128i zero_128 = _mm_setzero_si128();
__m128i src_0;
__m128i src_1;
__m128i src_2;
__m128i src_3;
__m128i src_0_init_H;
__m128i src_0_init_L;
__m128i src_2_init_H;
__m128i src_2_init_L;
__m128i src_0_init;
__m128i src_2_init;
__m128i upper_mask = _mm_cvtsi32_si128(0xF0F0F0F0);
// Build the write_mask (tranform a u32 to a 4 packets u32)
__m128i write_mask;
if (FOUR_BIT) {
if (UPPER) write_mask = _mm_cvtsi32_si128(0xF0000000);
else write_mask = _mm_cvtsi32_si128(0x0F000000);
} else {
write_mask = _mm_cvtsi32_si128(0xFF000000);
}
write_mask = _mm_shuffle_epi32(write_mask, 0);
for (int i=3 ; i >= 0 ; --i) {
if (FOUR_BIT) {
src_0_init = _mm_cvtsi32_si128(*(u32*)src);
src_2_init = _mm_cvtsi32_si128(*(u32*)(src + pitch));
} else {
src_0_init = _mm_loadl_epi64((__m128i*)src);
src_2_init = _mm_loadl_epi64((__m128i*)(src + pitch));
}
// Convert to 8 bits
if (FOUR_BIT) {
src_0_init_H = _mm_and_si128(upper_mask, src_0_init);
src_0_init_L = _mm_andnot_si128(upper_mask, src_0_init);
src_2_init_H = _mm_and_si128(upper_mask, src_2_init);
src_2_init_L = _mm_andnot_si128(upper_mask, src_2_init);
if (UPPER) {
src_0_init_L = _mm_slli_epi32(src_0_init_L, 4);
src_2_init_L = _mm_slli_epi32(src_2_init_L, 4);
} else {
src_0_init_H = _mm_srli_epi32(src_0_init_H, 4);
src_2_init_H = _mm_srli_epi32(src_2_init_H, 4);
}
// Repack the src to keep HByte order
src_0_init = _mm_unpacklo_epi8(src_0_init_L, src_0_init_H);
src_2_init = _mm_unpacklo_epi8(src_2_init_L, src_2_init_H);
}
// transform to 16 bits (add 0 in low bits)
src_0_init = _mm_unpacklo_epi8(zero_128, src_0_init);
src_2_init = _mm_unpacklo_epi8(zero_128, src_2_init);
// transform to 32 bits (add 0 in low bits)
src_0 = _mm_unpacklo_epi16(zero_128, src_0_init);
src_1 = _mm_unpackhi_epi16(zero_128, src_0_init);
src_2 = _mm_unpacklo_epi16(zero_128, src_2_init);
src_3 = _mm_unpackhi_epi16(zero_128, src_2_init);
// Reorder the data (same as 32 bits format)
__m128i dst_0 = _mm_unpacklo_epi64(src_0, src_2);
__m128i dst_1 = _mm_unpackhi_epi64(src_0, src_2);
__m128i dst_2 = _mm_unpacklo_epi64(src_1, src_3);
__m128i dst_3 = _mm_unpackhi_epi64(src_1, src_3);
// Load previous value and apply the ~write_mask
__m128i old_dst_0 = _mm_andnot_si128(write_mask, _mm_load_si128((__m128i*)dst));
dst_0 = _mm_or_si128(dst_0, old_dst_0);
__m128i old_dst_1 = _mm_andnot_si128(write_mask, _mm_load_si128(((__m128i*)dst)+1));
dst_1 = _mm_or_si128(dst_1, old_dst_1);
__m128i old_dst_2 = _mm_andnot_si128(write_mask, _mm_load_si128(((__m128i*)dst)+2));
dst_2 = _mm_or_si128(dst_2, old_dst_2);
__m128i old_dst_3 = _mm_andnot_si128(write_mask, _mm_load_si128(((__m128i*)dst)+3));
dst_3 = _mm_or_si128(dst_3, old_dst_3);
// store
_mm_stream_si128((__m128i*)dst, dst_0);
_mm_stream_si128(((__m128i*)dst)+1, dst_1);
_mm_stream_si128(((__m128i*)dst)+2, dst_2);
_mm_stream_si128(((__m128i*)dst)+3, dst_3);
// update the pointer
dst += 64;
src += 2*pitch;
}
}
// special swizzle macros - which I converted to functions.
#ifdef ZEROGS_SSE2
__forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_32
SwizzleBlock32_sse2_I<true>(dst, src, pitch, WriteMask);
#else
SwizzleBlock32_sse2(dst, src, pitch, WriteMask);
#endif
SwizzleBlock32_sse2_I<true>(dst, src, pitch);
}
__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch)
{
__m128i mask_H = _mm_load_si128((__m128i*)mask_24b_H);
__m128i mask_L = _mm_load_si128((__m128i*)mask_24b_L);
// Build the write_mask (tranform a u32 to a 4 packets u32)
__m128i write_mask = _mm_cvtsi32_si128(0x00FFFFFF);
write_mask = _mm_shuffle_epi32(write_mask, 0);
for (int i=3 ; i >= 0 ; --i) {
// Note src can be out of bound of GS memory (but there is some spare allocation
// to avoid a tricky corner case)
__m128i src_0 = _mm_loadu_si128((__m128i*)src);
__m128i src_1 = _mm_loadu_si128((__m128i*)(src+12));
__m128i src_2 = _mm_loadu_si128((__m128i*)(src+pitch));
__m128i src_3 = _mm_loadu_si128((__m128i*)(src+pitch+12));
// transform 24 bits value to 32 bits one
// 1/ Align a little the data
src_0 = _mm_slli_si128(src_0, 2);
src_0 = _mm_shufflelo_epi16(src_0, 0x39);
src_1 = _mm_slli_si128(src_1, 2);
src_1 = _mm_shufflelo_epi16(src_1, 0x39);
src_2 = _mm_slli_si128(src_2, 2);
src_2 = _mm_shufflelo_epi16(src_2, 0x39);
src_3 = _mm_slli_si128(src_3, 2);
src_3 = _mm_shufflelo_epi16(src_3, 0x39);
// 2/ Filter the 24 bits pixels & do the conversion
__m128i src_0_H = _mm_and_si128(src_0, mask_H);
__m128i src_0_L = _mm_and_si128(src_0, mask_L);
src_0_H = _mm_slli_si128(src_0_H, 1);
src_0 = _mm_or_si128(src_0_H, src_0_L);
__m128i src_1_H = _mm_and_si128(src_1, mask_H);
__m128i src_1_L = _mm_and_si128(src_1, mask_L);
src_1_H = _mm_slli_si128(src_1_H, 1);
src_1 = _mm_or_si128(src_1_H, src_1_L);
__m128i src_2_H = _mm_and_si128(src_2, mask_H);
__m128i src_2_L = _mm_and_si128(src_2, mask_L);
src_2_H = _mm_slli_si128(src_2_H, 1);
src_2 = _mm_or_si128(src_2_H, src_2_L);
__m128i src_3_H = _mm_and_si128(src_3, mask_H);
__m128i src_3_L = _mm_and_si128(src_3, mask_L);
src_3_H = _mm_slli_si128(src_3_H, 1);
src_3 = _mm_or_si128(src_3_H, src_3_L);
// Reorder the data (same as 32 bits format)
__m128i dst_0 = _mm_unpacklo_epi64(src_0, src_2);
__m128i dst_1 = _mm_unpackhi_epi64(src_0, src_2);
__m128i dst_2 = _mm_unpacklo_epi64(src_1, src_3);
__m128i dst_3 = _mm_unpackhi_epi64(src_1, src_3);
// Load previous value and apply the ~write_mask
__m128i old_dst_0 = _mm_andnot_si128(write_mask, _mm_load_si128((__m128i*)dst));
dst_0 = _mm_or_si128(dst_0, old_dst_0);
__m128i old_dst_1 = _mm_andnot_si128(write_mask, _mm_load_si128(((__m128i*)dst)+1));
dst_1 = _mm_or_si128(dst_1, old_dst_1);
__m128i old_dst_2 = _mm_andnot_si128(write_mask, _mm_load_si128(((__m128i*)dst)+2));
dst_2 = _mm_or_si128(dst_2, old_dst_2);
__m128i old_dst_3 = _mm_andnot_si128(write_mask, _mm_load_si128(((__m128i*)dst)+3));
dst_3 = _mm_or_si128(dst_3, old_dst_3);
// store
_mm_stream_si128((__m128i*)dst, dst_0);
_mm_stream_si128(((__m128i*)dst)+1, dst_1);
_mm_stream_si128(((__m128i*)dst)+2, dst_2);
_mm_stream_si128(((__m128i*)dst)+3, dst_3);
// update the pointer
dst += 64;
src += 2*pitch;
}
}
__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_16
SwizzleBlock16_sse2_I<true>(dst, src, pitch/*, WriteMask*/);
SwizzleBlock16_sse2_I<true>(dst, src, pitch);
#else
SwizzleBlock16_sse2(dst, src, pitch/*, WriteMask*/);
SwizzleBlock16_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_8
SwizzleBlock8_sse2_I<true>(dst, src, pitch/*, WriteMask*/);
SwizzleBlock8_sse2_I<true>(dst, src, pitch);
#else
SwizzleBlock8_sse2(dst, src, pitch/*, WriteMask*/);
SwizzleBlock8_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_4
SwizzleBlock4_sse2_I<true>(dst, src, pitch/*, WriteMask*/);
SwizzleBlock4_sse2_I<true>(dst, src, pitch);
#else
SwizzleBlock4_sse2(dst, src, pitch/*, WriteMask*/);
SwizzleBlock4_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_32
SwizzleBlock32_sse2_I<false>(dst, src, pitch, WriteMask);
#else
SwizzleBlock32u_sse2(dst, src, pitch, WriteMask);
#endif
SwizzleBlock32_sse2_I<false>(dst, src, pitch);
}
__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_16
SwizzleBlock16_sse2_I<false>(dst, src, pitch/*, WriteMask*/);
SwizzleBlock16_sse2_I<false>(dst, src, pitch);
#else
SwizzleBlock16u_sse2(dst, src, pitch/*, WriteMask*/);
SwizzleBlock16u_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_8
SwizzleBlock8_sse2_I<false>(dst, src, pitch/*, WriteMask*/);
SwizzleBlock8_sse2_I<false>(dst, src, pitch);
#else
SwizzleBlock8u_sse2(dst, src, pitch/*, WriteMask*/);
SwizzleBlock8u_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_4
SwizzleBlock4_sse2_I<false>(dst, src, pitch/*, WriteMask*/);
SwizzleBlock4_sse2_I<false>(dst, src, pitch);
#else
SwizzleBlock4u_sse2(dst, src, pitch/*, WriteMask*/);
SwizzleBlock4u_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock8H_4H<false, false>(dst, src, pitch);
}
__forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock8H_4H<true, true>(dst, src, pitch);
}
__forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock8H_4H<true, false>(dst, src, pitch);
}
#else
__forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock32_c(dst, src, pitch, WriteMask);
SwizzleBlock32_c(dst, src, pitch);
}
__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock16_c(dst, src, pitch/*, WriteMask*/);
SwizzleBlock16_c(dst, src, pitch);
}
__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock8_c(dst, src, pitch/*, WriteMask*/);
SwizzleBlock8_c(dst, src, pitch);
}
__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock4_c(dst, src, pitch/*, WriteMask*/);
SwizzleBlock4_c(dst, src, pitch);
}
__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock32_c(dst, src, pitch, WriteMask);
SwizzleBlock32_c(dst, src, pitch);
}
__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock16_c(dst, src, pitch/*, WriteMask*/);
SwizzleBlock16_c(dst, src, pitch);
}
__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock8_c(dst, src, pitch/*, WriteMask*/);
SwizzleBlock8_c(dst, src, pitch);
}
__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch)
{
SwizzleBlock4_c(dst, src, pitch/*, WriteMask*/);
SwizzleBlock4_c(dst, src, pitch);
}
__forceinline void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
__forceinline void __fastcall SwizzleBlock32_mask(u8* dst, u8* src, int srcpitch, u32 WriteMask)
{
u32* d = &g_columnTable32[0][0];
@ -513,26 +633,12 @@ __forceinline void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u
}
}
__forceinline void __fastcall SwizzleBlock24_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
__forceinline void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch)
{
u32* d = &g_columnTable32[0][0];
if (WriteMask == 0x00ffffff)
{
for (int j = 0; j < 8; j++, d += 8, src += srcpitch)
for (int i = 0; i < 8; i++)
((u32*)dst)[d[i]] = ((u32*)src)[i];
}
else
{
for (int j = 0; j < 8; j++, d += 8, src += srcpitch)
for (int i = 0; i < 8; i++)
((u32*)dst)[d[i]] = (((u32*)dst)[d[i]] & ~WriteMask) | (((u32*)src)[i] & WriteMask);
}
SwizzleBlock32_mask(dst, src, srcpitch, 0xffffffff);
}
__forceinline void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
__forceinline void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch)
{
u32* d = &g_columnTable16[0][0];
@ -541,7 +647,7 @@ __forceinline void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch, u
((u16*)dst)[d[i]] = ((u16*)src)[i];
}
__forceinline void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
__forceinline void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch)
{
u32* d = &g_columnTable8[0][0];
@ -550,7 +656,7 @@ __forceinline void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch, u3
dst[d[i]] = src[i];
}
__forceinline void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
__forceinline void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch)
{
u32* d = &g_columnTable4[0][0];
@ -566,13 +672,14 @@ __forceinline void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch, u3
}
}
#endif
__forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch)
{
u8* pnewsrc = src;
u32* pblock = tempblock;
for (int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch - 24)
// Note src can be out of bound of GS memory (but there is some spare allocation
// to avoid a tricky corner case)
for (int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch - 24)
{
for (int bx = 0; bx < 8; ++bx, pnewsrc += 3)
{
@ -580,23 +687,10 @@ __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask)
}
}
for (int bx = 0; bx < 7; ++bx, pnewsrc += 3)
{
/* might be 1 byte out of bounds of GS memory */
pblock[bx] = *(u32*)pnewsrc;
}
/* do 3 bytes for the last copy */
*((u8*)pblock + 28) = pnewsrc[0];
*((u8*)pblock + 29) = pnewsrc[1];
*((u8*)pblock + 30) = pnewsrc[2];
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff);
SwizzleBlock32_mask((u8*)dst, (u8*)tempblock, 32, 0x00ffffff);
}
__forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch)
{
u8* pnewsrc = src;
u32* pblock = tempblock;
@ -615,10 +709,10 @@ __forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask)
pblock[7] = u;
}
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000);
SwizzleBlock32_mask((u8*)dst, (u8*)tempblock, 32, 0xff000000);
}
__forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch)
{
u8* pnewsrc = src;
u32* pblock = tempblock;
@ -636,10 +730,10 @@ __forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask)
pblock[7] = u;
}
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000);
SwizzleBlock32_mask((u8*)dst, (u8*)tempblock, 32, 0xf0000000);
}
__forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask)
__forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch)
{
u8* pnewsrc = src;
u32* pblock = tempblock;
@ -657,6 +751,6 @@ __forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask)
pblock[7] = u >> 4;
}
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000);
SwizzleBlock32_mask((u8*)dst, (u8*)tempblock, 32, 0x0f000000);
}
#endif

View File

@ -24,26 +24,25 @@
#include "Mem.h"
#include "x86.h"
extern __forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch);
extern __forceinline void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
extern __forceinline void __fastcall SwizzleBlock24_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
extern __forceinline void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
extern __forceinline void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
extern __forceinline void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
extern __forceinline void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch);
extern __forceinline void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch);
extern __forceinline void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch);
extern __forceinline void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch);
// special swizzle macros - which I converted to functions.
extern __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff);
extern __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch);
extern __forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch);
#define SwizzleBlock24u SwizzleBlock24
#define SwizzleBlock8Hu SwizzleBlock8H
#define SwizzleBlock4HHu SwizzleBlock4HH

View File

@ -22,15 +22,13 @@
#include "NewRegs.h"
#include "PS2Etypes.h"
#include "zerogs.h"
#include "targets.h"
#ifdef USE_OLD_REGS
#include "Regs.h"
#else
const u32 g_primmult[8] = { 1, 2, 2, 3, 3, 3, 2, 0xff };
const u32 g_primsub[8] = { 1, 2, 1, 3, 1, 1, 2, 0 };
#include "ZZoglVB.h"
#include "ZZoglDrawing.h"
#ifdef _MSC_VER
#pragma warning(disable:4244)
@ -46,28 +44,6 @@ u32 s_uClampData[2] = {0, };
//u32 results[65535] = {0, };
// return true if triangle SHOULD be painted.
// My brain hurts. --arcum42
// return true if triangle SHOULD be painted.
inline bool NoHighlights(int i)
{
// This is hack-code, I still in search of correct reason, why some triangles should not be drawn.
int dummy = 0;
u32 resultA = prim->iip + (2 * (prim->tme)) + (4 * (prim->fge)) + (8 * (prim->abe)) + (16 * (prim->aa1)) + (32 * (prim->fst)) + (64 * (prim->ctxt)) + (128 * (prim->fix));
const pixTest curtest = ZeroGS::vb[i].test;
u32 result = curtest.ate + ((curtest.atst) << 1) +((curtest.afail) << 4) + ((curtest.date) << 6) + ((curtest.datm) << 7) + ((curtest.zte) << 8) + ((curtest.ztst)<< 9);
if ((resultA == 0x310a) && (result == 0x0)) return false; // Radiata Stories
//Old code
return (!(conf.settings().xenosaga_spec) || !ZeroGS::vb[i].zbuf.zmsk || prim->iip) ;
}
void __gifCall GIFPackedRegHandlerNull(const u32* data)
{
FUNCLOG
@ -127,24 +103,6 @@ void __gifCall GIFPackedRegHandlerUV(const u32* data)
ZZLog::Greg_Log("Packed UV: 0x%x, 0x%x", r->U, r->V);
}
void __gifCall KickVertex(bool adc)
{
FUNCLOG
if (++gs.primC >= (int)g_primmult[prim->prim])
{
if (!adc && NoHighlights(prim->ctxt)) (*ZeroGS::drawfn[prim->prim])();
gs.primC -= g_primsub[prim->prim];
if (adc && prim->prim == 5)
{
/* tri fans need special processing */
if (gs.nTriFanVert == gs.primIndex)
gs.primIndex = gs.primNext();
}
}
}
void __gifCall GIFPackedRegHandlerXYZF2(const u32* data)
{
FUNCLOG
@ -152,7 +110,7 @@ void __gifCall GIFPackedRegHandlerXYZF2(const u32* data)
gs.add_vertex(r->X, r->Y,r->Z, r->F);
// Fix Vertexes up later.
KickVertex(!!(r->ADC));
ZZKick->KickVertex(!!(r->ADC));
ZZLog::Greg_Log("Packed XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
}
@ -163,7 +121,7 @@ void __gifCall GIFPackedRegHandlerXYZ2(const u32* data)
gs.add_vertex(r->X, r->Y,r->Z);
// Fix Vertexes up later.
KickVertex(!!(r->ADC));
ZZKick->KickVertex(!!(r->ADC));
ZZLog::Greg_Log("Packed XYZ2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
}
@ -240,7 +198,7 @@ void __gifCall GIFRegHandlerXYZF2(const u32* data)
GIFRegXYZF* r = (GIFRegXYZF*)(data);
gs.add_vertex(r->X, r->Y,r->Z, r->F);
KickVertex(false);
ZZKick->KickVertex(false);
ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
}
@ -250,7 +208,7 @@ void __gifCall GIFRegHandlerXYZ2(const u32* data)
GIFRegXYZ* r = (GIFRegXYZ*)(data);
gs.add_vertex(r->X, r->Y,r->Z);
KickVertex(false);
ZZKick->KickVertex(false);
ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
}
@ -275,22 +233,22 @@ void __gifCall GIFRegHandlerTEX0(const u32* data)
}
// Order is important.
ZeroGS::vb[i].uNextTex0Data[0] = r->ai32[0];
ZeroGS::vb[i].uNextTex0Data[1] = r->ai32[1];
ZeroGS::vb[i].bNeedTexCheck = 1;
vb[i].uNextTex0Data[0] = r->ai32[0];
vb[i].uNextTex0Data[1] = r->ai32[1];
vb[i].bNeedTexCheck = 1;
// don't update unless necessary
if (PSMT_ISCLUT(psm))
{
if (ZeroGS::CheckChangeInClut(data[1], psm))
if (CheckChangeInClut(data[1], psm))
{
// loading clut, so flush whole texture
ZeroGS::vb[i].FlushTexData();
vb[i].FlushTexData();
}
else if (r->CSA != (ZeroGS::vb[i].uCurTex0.CSA))
else if (r->CSA != (vb[i].uCurTex0.CSA))
{
// check if csa is the same!! (ffx bisaid island, grass)
ZeroGS::Flush(i); // flush any previous entries
Flush(i); // flush any previous entries
}
}
}
@ -299,7 +257,7 @@ template <u32 i>
void __gifCall GIFRegHandlerCLAMP(const u32* data)
{
FUNCLOG
clampInfo& clamp = ZeroGS::vb[i].clamp;
clampInfo& clamp = vb[i].clamp;
GIFRegCLAMP* r = (GIFRegCLAMP*)(data);
// Worry about this later.
@ -307,9 +265,9 @@ void __gifCall GIFRegHandlerCLAMP(const u32* data)
if ((s_uClampData[i] != data[0]) || (((clamp.minv >> 8) | (clamp.maxv << 2)) != (data[1]&0x0fff)))
{
ZeroGS::Flush(i);
Flush(i);
ZeroGS::vb[i].bTexConstsSync = false;
vb[i].bTexConstsSync = false;
}
s_uClampData[i] = data[0];
@ -337,7 +295,7 @@ void __gifCall GIFRegHandlerXYZF3(const u32* data)
GIFRegXYZF* r = (GIFRegXYZF*)(data);
gs.add_vertex(r->X, r->Y,r->Z, r->F);
KickVertex(true);
ZZKick->KickVertex(true);
ZZLog::Greg_Log("XYZF3: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
}
@ -347,7 +305,7 @@ void __gifCall GIFRegHandlerXYZ3(const u32* data)
GIFRegXYZ* r = (GIFRegXYZ*)(data);
gs.add_vertex(r->X, r->Y,r->Z);
KickVertex(true);
ZZKick->KickVertex(true);
ZZLog::Greg_Log("XYZ3: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
}
@ -361,15 +319,15 @@ void __fastcall GIFRegHandlerTEX1(const u32* data)
{
FUNCLOG
GIFRegTEX1* r = (GIFRegTEX1*)(data);
tex1Info& tex1 = ZeroGS::vb[i].tex1;
tex1Info& tex1 = vb[i].tex1;
// Worry about this later.
if (!NoHighlights(i)) return;
if (conf.bilinear == 1 && (tex1.mmag != r->MMAG || tex1.mmin != r->MMIN))
{
ZeroGS::Flush(i);
ZeroGS::vb[i].bVarsTexSync = false;
Flush(i);
vb[i].bVarsTexSync = false;
}
tex1.lcm = r->LCM;
@ -387,13 +345,13 @@ template <u32 i>
void __gifCall GIFRegHandlerTEX2(const u32* data)
{
FUNCLOG
tex0Info& tex0 = ZeroGS::vb[i].tex0;
tex0Info& tex0 = vb[i].tex0;
ZeroGS::vb[i].FlushTexData();
vb[i].FlushTexData();
u32 psm = ZZOglGet_psm_TexBitsFix(data[0]);
u32* s_uTex0Data = ZeroGS::vb[i].uCurTex0Data;
u32* s_uTex0Data = vb[i].uCurTex0Data;
// don't update unless necessary
// if( ZZOglGet_psm_TexBitsFix(*s_uTex0Data) == ZZOglGet_psm_TexBitsFix(data[0]) ) { // psm is the same
@ -408,26 +366,26 @@ void __gifCall GIFRegHandlerTEX2(const u32* data)
if (tex0.cld != 0)
{
ZeroGS::texClutWrite(i);
texClutWrite(i);
// invalidate to make sure target didn't change!
ZeroGS::vb[i].bVarsTexSync = false;
vb[i].bVarsTexSync = false;
}
return;
}
}
ZeroGS::Flush(i);
Flush(i);
ZeroGS::vb[i].bVarsTexSync = false;
ZeroGS::vb[i].bTexConstsSync = false;
vb[i].bVarsTexSync = false;
vb[i].bTexConstsSync = false;
s_uTex0Data[0] = (s_uTex0Data[0] & ~0x03f00000) | (psm << 20);
s_uTex0Data[1] = (s_uTex0Data[1] & 0x1f) | (data[1] & ~0x1f);
tex0.psm = ZZOglGet_psm_TexBitsFix(data[0]);
if (PSMT_ISCLUT(tex0.psm)) ZeroGS::CluttingForFlushedTex(&tex0, data[1], i);
if (PSMT_ISCLUT(tex0.psm)) CluttingForFlushedTex(&tex0, data[1], i);
ZZLog::Greg_Log("TEX2_%d: 0x%x", i, data);
}
@ -437,8 +395,8 @@ void __gifCall GIFRegHandlerXYOFFSET(const u32* data)
FUNCLOG
// Affects that Mana Khemia opening dialog (when i == 0).
GIFRegXYOFFSET* r = (GIFRegXYOFFSET*)(data);
ZeroGS::vb[i].offset.x = r->OFX;
ZeroGS::vb[i].offset.y = r->OFY;
vb[i].offset.x = r->OFX;
vb[i].offset.y = r->OFY;
ZZLog::Greg_Log("XYOFFSET_%d: 0x%x, 0x%x", i, r->OFX, r->OFY);
}
@ -453,16 +411,16 @@ void __gifCall GIFRegHandlerPRIM(const u32 *data)
//ZZLog::Warn_Log("Warning: unknown bits in prim %8.8lx_%8.8lx", data[1], data[0]);
//}
// Come back to this one...
gs.nTriFanVert = gs.primIndex;
gs.primC = 0;
prim->prim = r->PRIM;
gs._prim[0].prim = r->PRIM;
gs._prim[1].prim = r->PRIM;
gs._prim[1]._val = (data[0] >> 3) & 0xff; // Setting the next 8 flags after prim at once.
ZeroGS::Prim();
gs.new_tri_fan = !(r->PRIM ^ PRIM_TRIANGLE_FAN);
ZZKick->DirtyValidPrevPrim();
Prim();
ZZLog::Greg_Log("PRIM");
}
@ -474,7 +432,7 @@ void __gifCall GIFRegHandlerPRMODE(const u32* data)
// Re-examine all code dealing with PRIMs in a bit.
gs._prim[0]._val = (data[0] >> 3) & 0xff;
if (gs.prac == 0) ZeroGS::Prim();
if (gs.prac == 0) Prim();
ZZLog::Greg_Log("PRMODE");
}
@ -487,7 +445,7 @@ void __gifCall GIFRegHandlerPRMODECONT(const u32* data)
gs.prac = r->AC;
prim = &gs._prim[gs.prac];
ZeroGS::Prim();
Prim();
ZZLog::Greg_Log("PRMODECONT");
}
@ -497,8 +455,8 @@ void __gifCall GIFRegHandlerTEXCLUT(const u32* data)
// Affects background coloration of initial Mana Khemia dialog.
GIFRegTEXCLUT* r = (GIFRegTEXCLUT*)(data);
ZeroGS::vb[0].FlushTexData();
ZeroGS::vb[1].FlushTexData();
vb[0].FlushTexData();
vb[1].FlushTexData();
// Fixme.
gs.clut.cbw = r->CBW << 6;
@ -514,9 +472,9 @@ void __gifCall GIFRegHandlerSCANMSK(const u32* data)
if(r->MSK != gs.smask)
{
ZeroGS::FlushBoth();
// ZeroGS::ResolveC(&ZeroGS::vb[0]);
// ZeroGS::ResolveZ(&ZeroGS::vb[0]);
FlushBoth();
// ResolveC(&vb[0]);
// ResolveZ(&vb[0]);
}
gs.smask = r->MSK;
@ -533,7 +491,7 @@ void __gifCall GIFRegHandlerMIPTBP1(const u32* data)
Flush();
}*/
miptbpInfo& miptbp0 = ZeroGS::vb[i].miptbp0;
miptbpInfo& miptbp0 = vb[i].miptbp0;
miptbp0.tbp[0] = r->TBP1;
miptbp0.tbw[0] = r->TBW1;
miptbp0.tbp[1] = r->TBP2;
@ -550,7 +508,7 @@ void __gifCall GIFRegHandlerMIPTBP2(const u32* data)
GIFRegMIPTBP2* r = (GIFRegMIPTBP2*)(data);
// Yep.
miptbpInfo& miptbp1 = ZeroGS::vb[i].miptbp1;
miptbpInfo& miptbp1 = vb[i].miptbp1;
miptbp1.tbp[0] = r->TBP4;
miptbp1.tbw[0] = r->TBW4;
miptbp1.tbp[1] = r->TBP5;
@ -568,10 +526,10 @@ void __gifCall GIFRegHandlerTEXA(const u32* data)
if ((r->AEM != gs.texa.aem) || (r->TA0 != gs.texa.ta[0]) || (r->TA1 != gs.texa.ta[1]))
{
ZeroGS::FlushBoth();
FlushBoth();
ZeroGS::vb[0].bTexConstsSync = false;
ZeroGS::vb[1].bTexConstsSync = false;
vb[0].bTexConstsSync = false;
vb[1].bTexConstsSync = false;
}
gs.texa.aem = r->AEM;
@ -589,10 +547,10 @@ void __gifCall GIFRegHandlerFOGCOL(const u32* data)
if (gs.fogcol != r->ai32[0])
{
ZeroGS::FlushBoth();
FlushBoth();
}
ZeroGS::SetFogColor(r);
SetFogColor(r);
gs.fogcol = r->ai32[0];
ZZLog::Greg_Log("FOGCOL: 0x%x", r->ai32[0]);
}
@ -601,7 +559,7 @@ void __gifCall GIFRegHandlerTEXFLUSH(const u32* data)
{
FUNCLOG
// GSdx doesn't even do anything here.
ZeroGS::SetTexFlush();
SetTexFlush();
ZZLog::Greg_Log("TEXFLUSH");
}
@ -610,7 +568,7 @@ void __gifCall GIFRegHandlerSCISSOR(const u32* data)
{
FUNCLOG
GIFRegSCISSOR* r = (GIFRegSCISSOR*)(data);
Rect2& scissor = ZeroGS::vb[i].scissor;
Rect2& scissor = vb[i].scissor;
Rect2 newscissor;
@ -623,10 +581,10 @@ void __gifCall GIFRegHandlerSCISSOR(const u32* data)
if (newscissor.x1 != scissor.x1 || newscissor.y1 != scissor.y1 ||
newscissor.x0 != scissor.x0 || newscissor.y0 != scissor.y0)
{
ZeroGS::Flush(i);
Flush(i);
// flush everything
ZeroGS::vb[i].bNeedFrameCheck = 1;
vb[i].bNeedFrameCheck = 1;
}
scissor = newscissor;
@ -663,12 +621,12 @@ void __gifCall GIFRegHandlerALPHA(const u32* data)
if (newalpha.c == 3) newalpha.c = 0;
if (newalpha.d == 3) newalpha.d = 0;
if ((newalpha.abcd != ZeroGS::vb[i].alpha.abcd) || (newalpha.fix != ZeroGS::vb[i].alpha.fix))
if ((newalpha.abcd != vb[i].alpha.abcd) || (newalpha.fix != vb[i].alpha.fix))
{
ZeroGS::Flush(i);
Flush(i);
}
ZeroGS::vb[i].alpha = newalpha;
vb[i].alpha = newalpha;
ZZLog::Greg_Log("ALPHA%d: A:0x%x B:0x%x C:0x%x D:0x%x FIX:0x%x ", i, r->A, r->B, r->C, r->D, r->FIX);
}
@ -682,7 +640,7 @@ void __gifCall GIFRegHandlerDIMX(const u32* data)
if (r->i64 != gs.dimx.i64)
{
ZeroGS::FlushBoth();
FlushBoth();
update = true;
}
@ -703,7 +661,7 @@ void __gifCall GIFRegHandlerDTHE(const u32* data)
if (r->DTHE != gs.dthe)
{
ZeroGS::FlushBoth();
FlushBoth();
}
gs.dthe = r->DTHE;
@ -717,7 +675,7 @@ void __gifCall GIFRegHandlerCOLCLAMP(const u32* data)
if (r->CLAMP != gs.colclamp)
{
ZeroGS::FlushBoth();
FlushBoth();
}
gs.colclamp = r->CLAMP;
@ -728,12 +686,12 @@ template <u32 i>
void __gifCall GIFRegHandlerTEST(const u32* data)
{
FUNCLOG
pixTest* test = &ZeroGS::vb[i].test;
pixTest* test = &vb[i].test;
GIFRegTEST* r = (GIFRegTEST*)(data);
if (test->_val != r->ai32[0])
{
ZeroGS::Flush(i);
Flush(i);
}
test->_val = r->ai32[0];
@ -747,9 +705,9 @@ void __gifCall GIFRegHandlerPABE(const u32* data)
if (gs.pabe != r->PABE)
{
ZeroGS::FlushBoth();
// ZeroGS::SetAlphaChanged(0, GPUREG_PABE);
// ZeroGS::SetAlphaChanged(1, GPUREG_PABE);
FlushBoth();
// SetAlphaChanged(0, GPUREG_PABE);
// SetAlphaChanged(1, GPUREG_PABE);
}
gs.pabe = r->PABE;
@ -762,12 +720,12 @@ void __gifCall GIFRegHandlerFBA(const u32* data)
FUNCLOG
GIFRegFBA* r = (GIFRegFBA*)(data);
if (r->FBA != ZeroGS::vb[i].fba.fba)
if (r->FBA != vb[i].fba.fba)
{
ZeroGS::FlushBoth();
FlushBoth();
}
ZeroGS::vb[i].fba.fba = r->FBA;
vb[i].fba.fba = r->FBA;
ZZLog::Greg_Log("FBA%d: 0x%x ", i, r->FBA);
}
@ -778,7 +736,7 @@ void __gifCall GIFRegHandlerFRAME(const u32* data)
// Affects opening dialogs, movie, and menu on Mana Khemia.
GIFRegFRAME* r = (GIFRegFRAME*)(data);
frameInfo& gsfb = ZeroGS::vb[i].gsfb;
frameInfo& gsfb = vb[i].gsfb;
int fbw = r->FBW * 64;
int fbp = r->FBP * 32;
@ -798,7 +756,7 @@ void __gifCall GIFRegHandlerFRAME(const u32* data)
return;
}
ZeroGS::FlushBoth();
FlushBoth();
if (r->FBW > 0) fbh = ZZOgl_fbh_Calc(r->FBP, r->FBW, r->PSM);
gsfb.fbp = fbp;
@ -808,7 +766,7 @@ void __gifCall GIFRegHandlerFRAME(const u32* data)
gsfb.fbm = ZZOglGet_fbm_FrameBitsFix(data[0], data[1]);
ZeroGS::vb[i].bNeedFrameCheck = 1;
vb[i].bNeedFrameCheck = 1;
ZZLog::Greg_Log("FRAME_%d", i);
}
@ -820,7 +778,7 @@ void __gifCall GIFRegHandlerZBUF(const u32* data)
GIFRegZBUF* r = (GIFRegZBUF*)(data);
ZZLog::Greg_Log("ZBUF_1");
zbufInfo& zbuf = ZeroGS::vb[i].zbuf;
zbufInfo& zbuf = vb[i].zbuf;
int psm = (0x30 | r->PSM);
int zbp = r->ZBP * 32;
@ -834,17 +792,17 @@ void __gifCall GIFRegHandlerZBUF(const u32* data)
// error detection
if (m_Blocks[psm].bpp == 0) return;
ZeroGS::FlushBoth();
FlushBoth();
zbuf.zbp = zbp;
zbuf.psm = psm;
zbuf.zmsk = r->ZMSK;
ZeroGS::vb[i].zprimmask = 0xffffffff;
vb[i].zprimmask = 0xffffffff;
if (zbuf.psm > 0x31) ZeroGS::vb[i].zprimmask = 0xffff;
if (zbuf.psm > 0x31) vb[i].zprimmask = 0xffff;
ZeroGS::vb[i].bNeedZCheck = 1;
vb[i].bNeedZCheck = 1;
}
void __gifCall GIFRegHandlerBITBLTBUF(const u32* data)
@ -911,11 +869,11 @@ void __gifCall GIFRegHandlerTRXDIR(const u32* data)
switch (gs.imageTransfer)
{
case 0: // host->loc
gs.imageTransfer = -1;
TerminateHostLocal();
break;
case 1: // loc->host
ZeroGS::TerminateLocalHost();
TerminateLocalHost();
break;
}
@ -932,15 +890,15 @@ void __gifCall GIFRegHandlerTRXDIR(const u32* data)
switch (gs.imageTransfer)
{
case 0: // host->loc
ZeroGS::InitTransferHostLocal();
InitTransferHostLocal();
break;
case 1: // loc->host
ZeroGS::InitTransferLocalHost();
InitTransferLocalHost();
break;
case 2:
ZeroGS::TransferLocalLocal();
TransferLocalLocal();
break;
case 3:
@ -967,7 +925,7 @@ void __gifCall GIFRegHandlerHWREG(const u32* data)
if (gs.imageTransfer == 0)
{
ZeroGS::TransferHostLocal(data, 2);
TransferHostLocal(data, 2);
}
else
{

View File

@ -23,6 +23,7 @@
#ifdef USE_OLD_REGS
#include "Regs.h"
#else
enum GIF_REG
{
GIF_REG_PRIM = 0x00,
@ -773,9 +774,200 @@ REG128_SET(GIFPackedReg)
GIFPackedNOP NOP;
REG_SET_END
REG64_(GSReg, BGCOLOR)
u32 R:8;
u32 G:8;
u32 B:8;
u32 _PAD1:8;
u32 _PAD2:32;
REG_END
REG64_(GSReg, BUSDIR)
u32 DIR:1;
u32 _PAD1:31;
u32 _PAD2:32;
REG_END
REG64_(GSReg, CSR)
u32 SIGNAL:1;
u32 FINISH:1;
u32 HSINT:1;
u32 VSINT:1;
u32 EDWINT:1;
u32 ZERO1:1;
u32 ZERO2:1;
u32 _PAD1:1;
u32 FLUSH:1;
u32 RESET:1;
u32 _PAD2:2;
u32 NFIELD:1;
u32 FIELD:1;
u32 FIFO:2;
u32 REV:8;
u32 ID:8;
u32 _PAD3:32;
REG_END
REG64_(GSReg, DISPFB) // (-1/2)
u32 FBP:9;
u32 FBW:6;
u32 PSM:5;
u32 _PAD:12;
u32 DBX:11;
u32 DBY:11;
u32 _PAD2:10;
REG_END
REG64_(GSReg, DISPLAY) // (-1/2)
u32 DX:12;
u32 DY:11;
u32 MAGH:4;
u32 MAGV:2;
u32 _PAD:3;
u32 DW:12;
u32 DH:11;
u32 _PAD2:9;
REG_END
REG64_(GSReg, EXTBUF)
u32 EXBP:14;
u32 EXBW:6;
u32 FBIN:2;
u32 WFFMD:1;
u32 EMODA:2;
u32 EMODC:2;
u32 _PAD1:5;
u32 WDX:11;
u32 WDY:11;
u32 _PAD2:10;
REG_END
REG64_(GSReg, EXTDATA)
u32 SX:12;
u32 SY:11;
u32 SMPH:4;
u32 SMPV:2;
u32 _PAD1:3;
u32 WW:12;
u32 WH:11;
u32 _PAD2:9;
REG_END
REG64_(GSReg, EXTWRITE)
u32 WRITE;
u32 _PAD2:32;
REG_END
REG64_(GSReg, IMR)
u32 _PAD1:8;
u32 SIGMSK:1;
u32 FINISHMSK:1;
u32 HSMSK:1;
u32 VSMSK:1;
u32 EDWMSK:1;
u32 _PAD2:19;
u32 _PAD3:32;
REG_END
REG64_(GSReg, PMODE)
u32 EN1:1;
u32 EN2:1;
u32 CRTMD:3;
u32 MMOD:1;
u32 AMOD:1;
u32 SLBG:1;
u32 ALP:8;
u32 _PAD:16;
u32 _PAD1:32;
REG_END
REG64_(GSReg, SIGLBLID)
u32 SIGID:32;
u32 LBLID:32;
REG_END
REG64_(GSReg, SMODE1)
u32 RC:3;
u32 LC:7;
u32 T1248:2;
u32 SLCK:1;
u32 CMOD:2;
u32 EX:1;
u32 PRST:1;
u32 SINT:1;
u32 XPCK:1;
u32 PCK2:2;
u32 SPML:4;
u32 GCONT:1;
u32 PHS:1;
u32 PVS:1;
u32 PEHS:1;
u32 PEVS:1;
u32 CLKSEL:2;
u32 NVCK:1;
u32 SLCK2:1;
u32 VCKSEL:2;
u32 VHP:1;
u32 _PAD1:27;
REG_END
REG64_(GSReg, SMODE2)
u32 INT:1;
u32 FFMD:1;
u32 DPMS:2;
u32 _PAD2:28;
u32 _PAD3:32;
REG_END
REG64_(GSReg, SIGBLID)
u32 SIGID;
u32 LBLID;
REG_END
#define PMODE ((GSRegPMODE*)(g_pBasePS2Mem+0x0000))
#define SMODE1 ((GSRegSMODE1*)(g_pBasePS2Mem+0x0010))
#define SMODE2 ((GSRegSMODE2*)(g_pBasePS2Mem+0x0020))
// SRFSH
#define SYNCH1 ((GSRegSYNCH1*)(g_pBasePS2Mem+0x0040))
#define SYNCH2 ((GSRegSYNCH2*)(g_pBasePS2Mem+0x0050))
#define SYNCV ((GSRegSYNCV*)(g_pBasePS2Mem+0x0060))
#define DISPFB1 ((GSRegDISPFB*)(g_pBasePS2Mem+0x0070))
#define DISPLAY1 ((GSRegDISPLAY*)(g_pBasePS2Mem+0x0080))
#define DISPFB2 ((GSRegDISPFB*)(g_pBasePS2Mem+0x0090))
#define DISPLAY2 ((GSRegDISPLAY*)(g_pBasePS2Mem+0x00a0))
#define EXTBUF ((GSRegEXTBUF*)(g_pBasePS2Mem+0x00b0))
#define EXTDATA ((GSRegEXTDATA*)(g_pBasePS2Mem+0x00c0))
#define EXTWRITE ((GSRegEXTWRITE*)(g_pBasePS2Mem+0x00d0))
#define BGCOLOR ((GSRegBGCOLOR*)(g_pBasePS2Mem+0x00e0))
#define CSR ((GSRegCSR*)(g_pBasePS2Mem+0x1000))
#define IMR ((GSRegIMR*)(g_pBasePS2Mem+0x1010))
#define BUSDIR ((GSRegBUSDIR*)(g_pBasePS2Mem+0x1040))
#define SIGLBLID ((GSRegSIGBLID*)(g_pBasePS2Mem+0x1080))
//
// sps2tags.h
//
#define GET_GIF_REG(tag, reg) \
(((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf)
#define GET_GSFPS (((SMODE1->CMOD&1) ? 50 : 60) / (SMODE2->INT ? 1 : 2))
extern void WriteTempRegs();
extern void SetFrameSkip(bool skip);
extern void ResetRegs();
extern void SetTexFlush();
extern void SetFogColor(u32 fog);
extern void SetFogColor(GIFRegFOGCOL* fog);
extern bool CheckChangeInClut(u32 highdword, u32 psm); // returns true if clut will change after this tex0 op
// flush current vertices, call before setting new registers (the main render method)
void Flush(int context);
void FlushBoth();
// called on a primitive switch
void Prim();
#endif
#endif // NEWREGS_H_INCLUDED

View File

@ -25,6 +25,7 @@
#include <string>
#include <map>
#include "Profile.h"
#include "Util.h"
using namespace std;
@ -59,7 +60,7 @@ struct DVPROFSTRUCT
while (it != listpChild.end())
{
SAFE_DELETE(*it);
safe_delete(*it);
++it;
}
}

View File

@ -20,7 +20,7 @@
#ifndef PROFILE_H_INCLUDED
#define PROFILE_H_INCLUDED
#include "zerogs.h"
#include "Util.h"
#if !defined(ZEROGS_DEVBUILD)
#define g_bWriteProfile 0
@ -35,6 +35,23 @@ extern u64 luPerfFreq;
#ifdef __LINUX__
#include <sys/time.h>
#include <sys/timeb.h> // ftime(), struct timeb
inline unsigned long timeGetTime()
{
timeb t;
ftime(&t);
return (unsigned long)(t.time*1000 + t.millitm);
}
inline unsigned long timeGetPreciseTime()
{
timespec t;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
return t.tv_nsec;
}
static __forceinline void InitCPUTicks()
{
@ -56,6 +73,12 @@ static __forceinline u64 GetCPUTicks()
#else
static __aligned16 LARGE_INTEGER lfreq;
inline unsigned long timeGetPreciseTime()
{
// Implement later.
return 0;
}
static __forceinline void InitCPUTicks()
{
QueryPerformanceFrequency(&lfreq);

File diff suppressed because it is too large Load Diff

View File

@ -116,13 +116,7 @@ extern FnType_GIFRegHandler GIFPackedRegHandlerXYZ2;
extern FnType_GIFRegHandler GIFPackedRegHandlerFOG;
extern FnType_GIFRegHandler GIFPackedRegHandlerA_D;
extern FnType_GIFRegHandler GIFPackedRegHandlerNOP;
// These are unimplemented, and fall back on the non-packed versions.
extern FnType_GIFRegHandler GIFPackedRegHandlerPRIM;
extern FnType_GIFRegHandler GIFPackedRegHandlerTEX0_1;
extern FnType_GIFRegHandler GIFPackedRegHandlerTEX0_2;
extern FnType_GIFRegHandler GIFPackedRegHandlerCLAMP_1;
extern FnType_GIFRegHandler GIFPackedRegHandlerCLAMP_2;
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZF3;
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZ3;
@ -133,47 +127,21 @@ extern FnType_GIFRegHandler GIFRegHandlerST;
extern FnType_GIFRegHandler GIFRegHandlerUV;
extern FnType_GIFRegHandler GIFRegHandlerXYZF2;
extern FnType_GIFRegHandler GIFRegHandlerXYZ2;
extern FnType_GIFRegHandler GIFRegHandlerTEX0_1;
extern FnType_GIFRegHandler GIFRegHandlerTEX0_2;
extern FnType_GIFRegHandler GIFRegHandlerCLAMP_1;
extern FnType_GIFRegHandler GIFRegHandlerCLAMP_2;
extern FnType_GIFRegHandler GIFRegHandlerFOG;
extern FnType_GIFRegHandler GIFRegHandlerXYZF3;
extern FnType_GIFRegHandler GIFRegHandlerXYZ3;
extern FnType_GIFRegHandler GIFRegHandlerNOP;
extern FnType_GIFRegHandler GIFRegHandlerTEX1_1;
extern FnType_GIFRegHandler GIFRegHandlerTEX1_2;
extern FnType_GIFRegHandler GIFRegHandlerTEX2_1;
extern FnType_GIFRegHandler GIFRegHandlerTEX2_2;
extern FnType_GIFRegHandler GIFRegHandlerXYOFFSET_1;
extern FnType_GIFRegHandler GIFRegHandlerXYOFFSET_2;
extern FnType_GIFRegHandler GIFRegHandlerPRMODECONT;
extern FnType_GIFRegHandler GIFRegHandlerPRMODE;
extern FnType_GIFRegHandler GIFRegHandlerTEXCLUT;
extern FnType_GIFRegHandler GIFRegHandlerSCANMSK;
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP1_1;
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP1_2;
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP2_1;
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP2_2;
extern FnType_GIFRegHandler GIFRegHandlerTEXA;
extern FnType_GIFRegHandler GIFRegHandlerFOGCOL;
extern FnType_GIFRegHandler GIFRegHandlerTEXFLUSH;
extern FnType_GIFRegHandler GIFRegHandlerSCISSOR_1;
extern FnType_GIFRegHandler GIFRegHandlerSCISSOR_2;
extern FnType_GIFRegHandler GIFRegHandlerALPHA_1;
extern FnType_GIFRegHandler GIFRegHandlerALPHA_2;
extern FnType_GIFRegHandler GIFRegHandlerDIMX;
extern FnType_GIFRegHandler GIFRegHandlerDTHE;
extern FnType_GIFRegHandler GIFRegHandlerCOLCLAMP;
extern FnType_GIFRegHandler GIFRegHandlerTEST_1;
extern FnType_GIFRegHandler GIFRegHandlerTEST_2;
extern FnType_GIFRegHandler GIFRegHandlerPABE;
extern FnType_GIFRegHandler GIFRegHandlerFBA_1;
extern FnType_GIFRegHandler GIFRegHandlerFBA_2;
extern FnType_GIFRegHandler GIFRegHandlerFRAME_1;
extern FnType_GIFRegHandler GIFRegHandlerFRAME_2;
extern FnType_GIFRegHandler GIFRegHandlerZBUF_1;
extern FnType_GIFRegHandler GIFRegHandlerZBUF_2;
extern FnType_GIFRegHandler GIFRegHandlerBITBLTBUF;
extern FnType_GIFRegHandler GIFRegHandlerTRXPOS;
extern FnType_GIFRegHandler GIFRegHandlerTRXREG;
@ -183,8 +151,39 @@ extern FnType_GIFRegHandler GIFRegHandlerSIGNAL;
extern FnType_GIFRegHandler GIFRegHandlerFINISH;
extern FnType_GIFRegHandler GIFRegHandlerLABEL;
// GifReg & GifPackedReg structs from GSdx, slightly modified
template<u32 ctxt>
extern FnType_GIFRegHandler GIFPackedRegHandlerTEX0;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFPackedRegHandlerCLAMP;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerTEX0;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerCLAMP;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerTEX1;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerTEX2;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerXYOFFSET;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP1;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP2;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerSCISSOR;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerALPHA;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerTEST;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerFBA;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerFRAME;
template<u32 ctxt>
extern FnType_GIFRegHandler GIFRegHandlerZBUF;
// GifReg & GifPackedReg structs from GSdx, slightly modified.
enum GS_ATST
{
ATST_NEVER = 0,
@ -946,6 +945,19 @@ REG_END
extern void WriteTempRegs();
extern void SetFrameSkip(bool skip);
extern void ResetRegs();
extern void SetTexFlush();
extern void SetFogColor(u32 fog);
extern void SetFogColor(GIFRegFOGCOL* fog);
extern bool CheckChangeInClut(u32 highdword, u32 psm); // returns true if clut will change after this tex0 op
// flush current vertices, call before setting new registers (the main render method)
void Flush(int context);
void FlushBoth();
// called on a primitive switch
void Prim();
#else
#include "NewRegs.h"
#endif

View File

@ -23,16 +23,20 @@
#ifdef _WIN32
#include <io.h>
#include "Utilities/RedtapeWindows.h"
#include <windows.h>
#include <windowsx.h>
#include <GL/gl.h>
#include <GL/glext.h>
#include "glprocs.h"
extern HWND GShwnd;
#else // linux basic definitions
#include <sys/stat.h>
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/glext.h>
@ -41,9 +45,11 @@ extern HWND GShwnd;
#endif
#define GSdefs
#include "PS2Edefs.h"
//Pcsx2Defs is included in Dependencies.h.
#include "Utilities/Dependencies.h"
#include "CRC.h"
#include "ZZLog.h"
@ -53,75 +59,13 @@ extern "C" u32 CALLBACK PS2EgetLibVersion2(u32 type);
extern "C" char* CALLBACK PS2EgetLibName(void);
#include "ZZoglMath.h"
#include "Profile.h"
#include <vector>
#include <string>
#include <cstring>
#include "Utilities/MemcpyFast.h"
#define memcpy_amd memcpy_fast
extern std::string s_strIniPath; // Air's new (r2361) new constant for ini file path
#if !defined(_MSC_VER) && !defined(HAVE_ALIGNED_MALLOC)
#include <malloc.h>
// declare linux equivalents
static __forceinline void* pcsx2_aligned_malloc(size_t size, size_t align)
{
assert(align < 0x10000);
char* p = (char*)malloc(size + align);
int off = 2 + align - ((int)(uptr)(p + 2) % align);
p += off;
*(u16*)(p - 2) = off;
return p;
}
static __forceinline void pcsx2_aligned_free(void* pmem)
{
if (pmem != NULL)
{
char* p = (char*)pmem;
free(p - (int)*(u16*)(p - 2));
}
}
#define _aligned_malloc pcsx2_aligned_malloc
#define _aligned_free pcsx2_aligned_free
#endif
#ifdef __LINUX__
#include <sys/timeb.h> // ftime(), struct timeb
inline unsigned long timeGetTime()
{
timeb t;
ftime(&t);
return (unsigned long)(t.time*1000 + t.millitm);
}
#include <time.h>
inline unsigned long timeGetPreciseTime()
{
timespec t;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
return t.tv_nsec;
}
struct RECT
{
int left, top;
int right, bottom;
};
#endif
#define max(a,b) (((a) > (b)) ? (a) : (b))
#define min(a,b) (((a) < (b)) ? (a) : (b))
typedef struct
{
int x, y, w, h;
@ -132,6 +76,11 @@ typedef struct
int x, y;
} Point;
typedef struct
{
int w, h;
} Size;
typedef struct
{
int x0, y0;
@ -152,47 +101,6 @@ enum GSWindowDim
GSDim_1280,
};
typedef union
{
struct
{
u32 texture_targs : 1;
u32 auto_reset : 1;
u32 interlace_2x : 1;
u32 texa : 1; // apply texa to non textured polys
u32 no_target_resolve : 1;
u32 exact_color : 1;
u32 no_color_clamp : 1;
u32 ffx : 1;
u32 no_alpha_fail : 1;
u32 no_depth_update : 1;
u32 quick_resolve_1 : 1;
u32 no_quick_resolve : 1;
u32 no_target_clut : 1; // full 16 bit resolution
u32 no_stencil : 1;
u32 vss_hack_off : 1; // vertical stripe syndrome
u32 no_depth_resolve : 1;
u32 full_16_bit_res : 1;
u32 resolve_promoted : 1;
u32 fast_update : 1;
u32 no_alpha_test : 1;
u32 disable_mrt_depth : 1;
u32 args_32_bit : 1;
u32 path3 : 1;
u32 parallel_context : 1; // tries to parallelize both contexts so that render calls are reduced (xenosaga)
// makes the game faster, but can be buggy
u32 xenosaga_spec : 1; // xenosaga specularity hack (ignore any zmask=1 draws)
u32 partial_pointers : 1; // whenver the texture or render target are small, tries to look for bigger ones to read from
u32 partial_depth : 1; // tries to save depth targets as much as possible across height changes
u32 reget : 1; // some sort of weirdness in ReGet() code
u32 gust : 1; // Needed for Gustgames fast update.
u32 no_logz : 1; // Intended for linux -- not logarithmic Z.
u32 automatic_skip_draw :1; // allow debug of the automatic skip draw option
u32 reserved2 :1;
};
u32 _u32;
} gameHacks;
typedef union
{
struct
@ -339,22 +247,8 @@ union name \
#define REG_SET_END };
#ifndef SAFE_DELETE
# define SAFE_DELETE(x) if( (x) != NULL ) { delete (x); (x) = NULL; }
#endif
#ifndef SAFE_DELETE_ARRAY
# define SAFE_DELETE_ARRAY(x) if( (x) != NULL ) { delete[] (x); (x) = NULL; }
#endif
#ifndef SAFE_RELEASE
# define SAFE_RELEASE(x) if( (x) != NULL ) { (x)->Release(); (x) = NULL; }
#endif
#define FORIT(it, v) for(it = (v).begin(); it != (v).end(); ++(it))
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
extern void LoadConfig();
extern void SaveConfig();
@ -366,12 +260,11 @@ extern char *SysLibError(); // Gets previous error loading sysbols
extern void SysCloseLibrary(void *lib); // Closes Library
extern void SysMessage(const char *fmt, ...);
#ifdef __LINUX__
#include "Utilities/MemcpyFast.h"
#define memcpy_amd memcpy_fast
#else
extern "C" void * memcpy_amd(void *dest, const void *src, size_t n);
extern "C" u8 memcmp_mmx(const void *dest, const void *src, int n);
#ifdef ZEROGS_DEVBUILD
extern char* EFFECT_NAME;
extern char* EFFECT_DIR;
extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve;
extern bool g_bSaveTrans, g_bUpdateEffect, g_bSaveTex, g_bSaveResolved;
#endif
extern bool g_bDisplayFPS; // should we display FPS on screen?

View File

@ -351,7 +351,7 @@ BOOL AVI_Exit()
static PAVIFILE pfile = NULL;
static PAVISTREAM ps = NULL;
static PAVISTREAM psCompressed = NULL;
static int count = 0;
static int avi_count = 0;
// Initialization...
@ -374,7 +374,7 @@ bool START_AVI(const char* file_name)
bool ADD_FRAME_FROM_DIB_TO_AVI(const char* _compressor, int _frameRate, int width, int height, int bits, void* pdata)
{
if(count == 0)
if(avi_count == 0)
{
if(! AVI_CreateStream(pfile, &ps, _frameRate,
width*height/bits,
@ -401,7 +401,7 @@ bool ADD_FRAME_FROM_DIB_TO_AVI(const char* _compressor, int _frameRate, int widt
}
HRESULT hr = AVIStreamWrite(psCompressed, // stream pointer
count, // time of this frame
avi_count, // time of this frame
1, // number to write
pdata,
width*height/8, // lpbi->biSizeImage, // size of this frame
@ -416,7 +416,7 @@ bool ADD_FRAME_FROM_DIB_TO_AVI(const char* _compressor, int _frameRate, int widt
return FALSE;
}
count++;
avi_count++;
return true;
}
@ -425,7 +425,7 @@ bool ADD_FRAME_FROM_DIB_TO_AVI(const char* _compressor, int _frameRate, int widt
bool ADD_FRAME_FROM_DIB_TO_AVI(HANDLE dib, const char* _compressor, int _frameRate)
{
LPBITMAPINFOHEADER lpbi;
if(count == 0)
if(avi_count == 0)
{
lpbi = (LPBITMAPINFOHEADER)GlobalLock(dib);
if(! AVI_CreateStream(pfile, &ps, _frameRate,
@ -449,7 +449,7 @@ bool ADD_FRAME_FROM_DIB_TO_AVI(HANDLE dib, const char* _compressor, int _frameRa
}
lpbi = (LPBITMAPINFOHEADER)GlobalLock(dib);
if(! AVI_AddFrame(psCompressed, count * 1, lpbi))
if(! AVI_AddFrame(psCompressed, avi_count * 1, lpbi))
{
//printf("Error - AVI_AddFrame()\n");
GlobalUnlock(lpbi);
@ -457,7 +457,7 @@ bool ADD_FRAME_FROM_DIB_TO_AVI(HANDLE dib, const char* _compressor, int _frameRa
}
GlobalUnlock(lpbi);
count++;
avi_count++;
return true;
}

View File

@ -155,6 +155,7 @@
<ClCompile Include="..\x86.cpp" />
<ClCompile Include="..\zerogs.cpp" />
<ClCompile Include="..\zpipe.cpp" />
<ClCompile Include="..\ZZHacks.cpp" />
<ClCompile Include="..\ZZKeyboard.cpp" />
<ClCompile Include="..\ZZLog.cpp" />
<ClCompile Include="..\ZZoglCreate.cpp" />
@ -203,6 +204,7 @@
<ClInclude Include="Win32.h" />
<ClInclude Include="..\x86.h" />
<ClInclude Include="..\ZZGl.h" />
<ClInclude Include="..\ZZHacks.h" />
<ClInclude Include="..\ZZLog.h" />
<ClInclude Include="..\zerogs.h" />
<ClInclude Include="..\zerogsmath.h" />

View File

@ -285,10 +285,6 @@
RelativePath="..\Mem_Tables.cpp"
>
</File>
<File
RelativePath="..\memcpy_amd.cpp"
>
</File>
<File
RelativePath="..\NewRegs.cpp"
>
@ -356,10 +352,22 @@
RelativePath="..\zpipe.cpp"
>
</File>
<File
RelativePath="..\ZZClut.cpp"
>
</File>
<File
RelativePath="..\ZZKeyboard.cpp"
>
</File>
<File
RelativePath="..\ZZHacks.cpp"
>
</File>
<File
RelativePath="..\ZZoglDrawing.cpp"
>
</File>
<File
RelativePath="..\ZZLog.cpp"
>
@ -413,10 +421,18 @@
RelativePath="..\GifTransfer.h"
>
</File>
<File
RelativePath="..\GLWin.h"
>
</File>
<File
RelativePath="..\GS.h"
>
</File>
<File
RelativePath="..\HostMemory.h"
>
</File>
<File
RelativePath="..\Mem.h"
>
@ -481,14 +497,34 @@
RelativePath="..\zerogs.h"
>
</File>
<File
RelativePath="..\zpipe.h"
>
</File>
<File
RelativePath="..\ZZClut.h"
>
</File>
<File
RelativePath="..\ZZGl.h"
>
</File>
<File
RelativePath="..\ZZHacks.h"
>
</File>
<File
RelativePath="..\ZZoglDrawing.h"
>
</File>
<File
RelativePath="..\ZZLog.h"
>
</File>
<File
RelativePath="..\ZZoglCRTC.h"
>
</File>
<File
RelativePath="..\ZZoglFlushHack.h"
>
@ -501,6 +537,14 @@
RelativePath="..\ZZoglShaders.h"
>
</File>
<File
RelativePath="..\ZZoglShoots.h"
>
</File>
<File
RelativePath="..\ZZoglVB.h"
>
</File>
</Filter>
<Filter
Name="Resource Files"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef CLUT_H_INCLUDED
#define CLUT_H_INCLUDED
extern void GSMem_to_ClutBuffer(tex0Info &tex0);
template <class T> extern void ClutBuffer_to_Array(T* dst, u32 csa, u32 clutsize);
template <class T> extern void Build_Clut_Texture(u32 psm, u32 height, T* pclut, u8* psrc, T* pdst);
template <class T> extern bool Cmp_ClutBuffer_GSMem(T* GSmem, u32 csa, u32 clutsize);
template <class T> extern bool Cmp_ClutBuffer_SavedClut(T* saved_clut, u32 csa, u32 clutsize);
#endif // CLUT_H_INCLUDED

View File

@ -78,8 +78,6 @@ extern u32 s_stencilfunc, s_stencilref, s_stencilmask;
// global alpha blending settings
extern GLenum g_internalRGBAFloat16Fmt;
extern const GLenum primtype[8];
#define SAFE_RELEASE_TEX(x) { if( (x) != 0 ) { glDeleteTextures(1, &(x)); x = 0; } }
// inline for an extremely often used sequence

View File

@ -0,0 +1,186 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "Util.h"
#include "ZZHacks.h"
#include "ZZLog.h"
int CurrentHack = 0;
// A list of what bit controls each of the current hacks.
u32 hackList[HACK_NUMBER] =
{
0, // No hack
1, //GAME_TEXTURETARGS,
2, //GAME_AUTORESET,
3, //GAME_INTERLACE2X,
4, //GAME_TEXAHACK,
5, //GAME_NOTARGETRESOLVE,
6, //GAME_EXACTCOLOR,
//7 //GAME_NOCOLORCLAMP,
//8 //GAME_FFXHACK,
9, //GAME_NOALPHAFAIL,
10, //GAME_NODEPTHUPDATE,
11, //GAME_QUICKRESOLVE1,
12, //GAME_NOQUICKRESOLVE,
13, //GAME_NOTARGETCLUT,
14, //GAME_NOSTENCIL,
15, //GAME_NODEPTHRESOLVE,
16, //GAME_FULL16BITRES,
17, //GAME_RESOLVEPROMOTED,
18, //GAME_FASTUPDATE,
19, //GAME_NOALPHATEST,
20, //GAME_DISABLEMRTDEPTH,
//21 //GAME_32BITTARGS,
//22 //GAME_PATH3HACK,
//23 //GAME_DOPARALLELCTX,
24, //GAME_XENOSPECHACK,
//25 //GAME_PARTIALPOINTERS,
26, //GAME_PARTIALDEPTH,
27, //GAME_REGETHACK,
28, //GAME_GUSTHACK,
29, //GAME_NOLOGZ,
30, //GAME_AUTOSKIPDRAW
};
char hackDesc[32][64] =
{
"No hack",
"Texture targs",
"Auto reset",
"Interlace 2x",
"Texa",
"No target resolve",
"Exact color",
"No color clamp",
"Final Fantasy X",
"No alpha fail",
"No depth update",
"Quick resolve 1",
"No Quick resolve",
"No target clut",
"No stencil",
"VSS",
"No depth resolve",
"Full 16 bit resolution",
"Resolve promoted",
"Fast update",
"No alpha test",
"Disable mrt depth",
"Args 32 bit",
"",
"Parallel context",
"Xenosaga spec",
"Partial pointers",
"Partial depth",
"Reget",
"Gust",
"No logz",
"Automatic skip draw"
};
struct hacks
{
bool enabled;
char shortDesc[64];
char longDesc[256];
};
hacks hack_list[32] =
{
{ true, "No hack", "No hack" },
{ true, "Texture targs", "Tex Target checking - 00000001\nLego Racers" },
{ true, "Auto reset", "Auto reset targs - 00000002\nUse when game is slow and toggling AA fixes it. Samurai Warriors. (Automatically on for Shadow Hearts)" },
{ true, "Interlace 2x", "Interlace 2X - 00000004\nFixes 2x bigger screen. Gradius 3." },
{ false, "Texa", "" },
{ true, "No target resolve", "No target resolves - 00000010\nStops all resolving of targets. Try this first for really slow games. (Automatically on for Dark Cloud 1.)" },
{ true, "Exact color", "Exact color testing - 00000020\nFixes overbright or shadow/black artifacts. Crash 'n Burn." },
{ false, "No color clamp", "No color clamping - 00000040\nSpeeds up games, but might be too bright or too dim." },
{ false, "Final Fantasy X", "" },
{ false, "No alpha fail", "Alpha Fail hack - 00000100\nRemove vertical stripes or other coloring artifacts. Breaks Persona 4 and MGS3. (Automatically on for Sonic Unleashed, Shadow the Hedgehog, & Ghost in the Shell.)" },
{ true, "No depth update", "Disable depth updates - 00000200" },
{ true, "Quick resolve 1", "Resolve Hack #1 - 00000400\n Speeds some games. Kingdom Hearts."},
{ true, "No Quick resolve", "Resolve Hack #2 - 00000800\nShadow Hearts, Urbz. Destroys FFX."},
{ true, "No target clut", "No target CLUT - 00001000\nResident Evil 4, or foggy scenes." },
{ true, "No stencil", "Disable stencil buffer - 00002000\nUsually safe to do for simple scenes. Harvest Moon." },
{ false, "VSS", "" },
{ true, "No depth resolve", "No depth resolve - 00008000\nMight give z buffer artifacts." },
{ true, "Full 16 bit resolution", "Full 16 bit resolution - 00010000\nUse when half the screen is missing." },
{ true, "Resolve promoted", "Resolve Hack #3 - 00020000\nNeopets" },
{ true, "Fast update", "Fast Update - 00040000\n Speeds some games. Needed for Sonic Unleashed. Okami." },
{ true, "No alpha test", "Disable alpha testing - 00080000" },
{ true, "Disable mrt depth", "Enable Multiple RTs - 00100000" },
{ false, "Args 32 bit", "" },
{ false, "Path3", "" },
{ false, "Parallel context", "" },
{ true, "Xenosaga spec", "Specular Highlights - 01000000\nMakes graphics faster by removing highlights. (Automatically on for Xenosaga, Okami, & Okage.)" },
{ false, "Partial pointers", "Partial targets - 02000000" },
{ true, "Partial depth", "Partial depth - 04000000" },
{ false, "Reget", "" },
{ true, "Gust", "Gust fix - 10000000. Makes gust games cleaner and faster. (Automatically on for most Gust games)" },
{ true, "No logz", "No logarithmic Z - 20000000. Could decrease number of Z-artifacts." },
{ true, "Automatic skip draw", "Remove blur effect on some games\nSlow games." }
};
void ReportHacks(gameHacks hacks)
{
for(int i = 0; i < 32; i++)
{
if (hacks._u32 & (1 << i))
{
ZZLog::WriteLn("'%s' hack enabled.", hackDesc[i+1]);
}
}
}
void ListHacks()
{
if ((!conf.disableHacks) && (conf.def_hacks._u32 != 0))
{
ZZLog::WriteLn("Auto-enabling these hacks:");
ReportHacks(conf.def_hacks);
}
if (conf.hacks._u32 != 0)
{
ZZLog::WriteLn("You've manually enabled these hacks:");
ReportHacks(conf.hacks);
}
}
void DisplayHack(int hack)
{
ZZLog::WriteToScreen2("***%d %s", hack, hackDesc[hackList[hack]]);
}
void ChangeCurrentHack(int hack)
{
FUNCLOG
conf.hacks._u32 &= !(hackList[CurrentHack]);
conf.hacks._u32 |= hackList[hack];
DisplayHack(hack);
CurrentHack = hack;
SaveConfig();
}

View File

@ -0,0 +1,99 @@
#ifndef ZZHACKS_H_INCLUDED
#define ZZHACKS_H_INCLUDED
#include "PS2Edefs.h"
// This is a list of the various hacks, and what bit controls them.
// Changing these is not advised unless you know what you are doing.
enum GAME_HACK_OPTIONS
{
GAME_TEXTURETARGS = 0x00000001,
GAME_AUTORESET = 0x00000002,
GAME_INTERLACE2X = 0x00000004,
GAME_TEXAHACK = 0x00000008, // apply texa to non textured polys
GAME_NOTARGETRESOLVE = 0x00000010,
GAME_EXACTCOLOR = 0x00000020,
GAME_NOCOLORCLAMP = 0x00000040,
GAME_FFXHACK = 0x00000080,
GAME_NOALPHAFAIL = 0x00000100,
GAME_NODEPTHUPDATE = 0x00000200,
GAME_QUICKRESOLVE1 = 0x00000400,
GAME_NOQUICKRESOLVE = 0x00000800,
GAME_NOTARGETCLUT = 0x00001000, // full 16 bit resolution
GAME_NOSTENCIL = 0x00002000,
GAME_VSSHACKOFF = 0x00004000, // vertical stripe syndrome
GAME_NODEPTHRESOLVE = 0x00008000,
GAME_FULL16BITRES = 0x00010000,
GAME_RESOLVEPROMOTED = 0x00020000,
GAME_FASTUPDATE = 0x00040000,
GAME_NOALPHATEST = 0x00080000,
GAME_DISABLEMRTDEPTH = 0x00100000,
GAME_32BITTARGS = 0x00200000,
GAME_PATH3HACK = 0x00400000,
GAME_DOPARALLELCTX = 0x00800000, // tries to parallelize both contexts so that render calls are reduced (xenosaga)
// makes the game faster, but can be buggy
GAME_XENOSPECHACK = 0x01000000, // xenosaga specularity hack (ignore any zmask=1 draws)
GAME_PARTIALPOINTERS = 0x02000000, // whenver the texture or render target are small, tries to look for bigger ones to read from
GAME_PARTIALDEPTH = 0x04000000, // tries to save depth targets as much as possible across height changes
GAME_REGETHACK = 0x08000000, // some sort of weirdness in ReGet() code
GAME_GUSTHACK = 0x10000000, // Needed for Gustgames fast update.
GAME_NOLOGZ = 0x20000000, // Intended for linux -- not logarithmic Z.
GAME_AUTOSKIPDRAW = 0x40000000, // Remove blur effect on some games
GAME_RESERVED_HACK = 0x80000000
};
#define USEALPHATESTING (!(conf.settings().no_alpha_test))
typedef union
{
struct
{
u32 texture_targs : 1;
u32 auto_reset : 1;
u32 interlace_2x : 1;
u32 texa : 1; // apply texa to non textured polys
u32 no_target_resolve : 1;
u32 exact_color : 1;
u32 no_color_clamp : 1;
u32 ffx : 1;
u32 no_alpha_fail : 1;
u32 no_depth_update : 1;
u32 quick_resolve_1 : 1;
u32 no_quick_resolve : 1;
u32 no_target_clut : 1; // full 16 bit resolution
u32 no_stencil : 1;
u32 vss_hack_off : 1; // vertical stripe syndrome
u32 no_depth_resolve : 1;
u32 full_16_bit_res : 1;
u32 resolve_promoted : 1;
u32 fast_update : 1;
u32 no_alpha_test : 1;
u32 disable_mrt_depth : 1;
u32 args_32_bit : 1;
u32 path3 : 1;
u32 parallel_context : 1; // tries to parallelize both contexts so that render calls are reduced (xenosaga)
// makes the game faster, but can be buggy
u32 xenosaga_spec : 1; // xenosaga specularity hack (ignore any zmask=1 draws)
u32 partial_pointers : 1; // whenver the texture or render target are small, tries to look for bigger ones to read from
u32 partial_depth : 1; // tries to save depth targets as much as possible across height changes
u32 reget : 1; // some sort of weirdness in ReGet() code
u32 gust : 1; // Needed for Gustgames fast update.
u32 no_logz : 1; // Intended for linux -- not logarithmic Z.
u32 automatic_skip_draw :1; // allow debug of the automatic skip draw option
u32 reserved2 :1;
};
u32 _u32;
} gameHacks;
#define HACK_NUMBER 25
extern u32 hackList[HACK_NUMBER];
extern char hackDesc[32][64];
extern int CurrentHack;
extern void ReportHacks(gameHacks hacks);
extern void ListHacks();
extern void DisplayHack(int hack);
extern void ChangeCurrentHack(int hack);
#endif // ZZHACKS_H_INCLUDED

View File

@ -30,12 +30,12 @@ extern char *libraryName;
extern const unsigned char zgsversion;
extern unsigned char zgsrevision, zgsbuild, zgsminor;
extern u32 THR_KeyEvent; // value for passing out key events between threads
extern bool THR_bShift, SaveStateExists;
extern bool SaveStateExists;
const char* s_aa[5] = { "AA none |", "AA 2x |", "AA 4x |", "AA 8x |", "AA 16x |" };
const char* pbilinear[] = { "off", "normal", "forced" };
extern void SetAA(int mode);
void ProcessBilinear()
{
@ -86,7 +86,7 @@ void ProcessAASetting(bool reverse)
conf.incAA();
sprintf(strtitle, "anti-aliasing - %s", s_aa[conf.aa]);
ZeroGS::SetAA(conf.aa);
SetAA(conf.aa);
ZZLog::WriteToScreen(strtitle);
SaveConfig();
@ -110,75 +110,26 @@ void ProcessWireFrame()
ZZLog::WriteToScreen(strtitle);
}
typedef struct GameHackStruct
{
const char HackName[40];
u32 HackMask;
} GameHack;
#define HACK_NUMBER 25
GameHack HackinshTable[HACK_NUMBER] =
{
{"*** 0 No Hack", 0},
{"*** 1 TexTargets Check", GAME_TEXTURETARGS},
{"*** 2 Autoreset Targets", GAME_AUTORESET},
{"*** 3 Interlace 2x", GAME_INTERLACE2X},
{"*** 4 TexA hack", GAME_TEXAHACK},
{"*** 5 No Target Resolve", GAME_NOTARGETRESOLVE},
{"*** 6 Exact color", GAME_EXACTCOLOR},
//{"***xx No color clamp", GAME_NOCOLORCLAMP},
//{"***xx FFX hack", GAME_FFXHACK},
{"*** 7 No Alpha Fail", GAME_NOALPHAFAIL},
{"*** 8 No Depth Update", GAME_NODEPTHUPDATE},
{"*** 9 Quick Resolve 1", GAME_QUICKRESOLVE1},
{"***10 No quick resolve", GAME_NOQUICKRESOLVE},
{"***11 Notaget clut", GAME_NOTARGETCLUT},
{"***12 No Stencil", GAME_NOSTENCIL},
{"***13 No Depth resolve", GAME_NODEPTHRESOLVE},
{"***14 Full 16 bit", GAME_FULL16BITRES},
{"***15 Resolve promoted", GAME_RESOLVEPROMOTED},
{"***16 Fast Update", GAME_FASTUPDATE},
{"***17 No Alpha Test", GAME_NOALPHATEST},
{"***18 Disable MRT depth", GAME_DISABLEMRTDEPTH},
//{"***xx 32 bit targs", GAME_32BITTARGS},
//{"***xx Path 3 hack", GAME_PATH3HACK},
//{"***xx Parallel calls", GAME_DOPARALLELCTX},
{"***19 Specular highlights", GAME_XENOSPECHACK},
//{"***xx Partial pointers", GAME_PARTIALPOINTERS},
{"***20 Partial depth", GAME_PARTIALDEPTH},
{"***21 Reget hack", GAME_REGETHACK},
{"***22 Gust hack", GAME_GUSTHACK},
{"***23 Log-Z", GAME_NOLOGZ},
{"***24 Auto skipdraw", GAME_AUTOSKIPDRAW}
};
int CurrentHackSetting = 0;
void ProcessHackSetting(bool reverse)
{
FUNCLOG
// printf ("A %d\n", HackinshTable[CurrentHackSetting].HackMask);
conf.hacks._u32 &= !(HackinshTable[CurrentHackSetting].HackMask);
int hack = CurrentHack;
if (reverse)
{
CurrentHackSetting--;
hack--;
if (CurrentHackSetting == -1) CurrentHackSetting = HACK_NUMBER - 1;
if (hack < 0) hack = HACK_NUMBER - 1;
}
else
{
CurrentHackSetting++;
hack++;
if (CurrentHackSetting >= HACK_NUMBER) CurrentHackSetting = 0;
if (hack >= HACK_NUMBER) hack = 0;
}
ChangeCurrentHack(hack);
conf.hacks._u32 |= HackinshTable[CurrentHackSetting].HackMask;
ZZLog::WriteToScreen(HackinshTable[CurrentHackSetting].HackName);
SaveConfig();
}
@ -188,7 +139,7 @@ void ProcessSaveState()
char strtitle[256];
sprintf(strtitle, "Saving in savestate %d", CurrentSavestate);
SaveStateExists = true;
ZZLog::WriteToScreen(HackinshTable[CurrentHackSetting].HackName);
if (CurrentHack != 0) DisplayHack(CurrentHack);
}
void OnFKey(int key, int shift)
@ -253,99 +204,3 @@ void WriteBilinear()
break;
}
}
#ifdef _WIN32
void ProcessEvents()
{
MSG msg;
ZeroMemory(&msg, sizeof(msg));
while (1)
{
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE))
{
switch (msg.message)
{
case WM_KEYDOWN :
int my_KeyEvent = msg.wParam;
bool my_bShift = !!(GetKeyState(VK_SHIFT) & 0x8000);
switch (msg.wParam)
{
case VK_F5:
case VK_F6:
case VK_F7:
case VK_F9:
OnFKey(msg.wParam - VK_F1 + 1, my_bShift);
break;
case VK_ESCAPE:
if (conf.fullscreen())
{
// destroy that msg
conf.setFullscreen(false);
ZeroGS::ChangeDeviceSize(conf.width, conf.height);
UpdateWindow(GShwnd);
continue; // so that msg doesn't get sent
}
else
{
SendMessage(GShwnd, WM_DESTROY, 0, 0);
return;
}
break;
}
break;
}
TranslateMessage(&msg);
DispatchMessage(&msg);
}
else
{
break;
}
}
if ((GetKeyState(VK_MENU) & 0x8000) && (GetKeyState(VK_RETURN) & 0x8000))
{
conf.zz_options.fullscreen = !conf.zz_options.fullscreen;
ZeroGS::SetChangeDeviceSize(
(conf.fullscreen()) ? 1280 : conf.width,
(conf.fullscreen()) ? 960 : conf.height);
}
}
#else // linux
void ProcessEvents()
{
FUNCLOG
// check resizing
GLWin.ResizeCheck();
if (THR_KeyEvent) // This value was passed from GSKeyEvents which could be in another thread
{
int my_KeyEvent = THR_KeyEvent;
bool my_bShift = THR_bShift;
THR_KeyEvent = 0;
switch (my_KeyEvent)
{
case XK_F5:
case XK_F6:
case XK_F7:
case XK_F9:
OnFKey(my_KeyEvent - XK_F1 + 1, my_bShift);
break;
}
}
}
#endif // linux

View File

@ -19,9 +19,44 @@
#include <stdio.h>
#include "ZZLog.h"
#include <list>
#include <cstring>
extern GSconf conf;
using namespace std;
static list<MESSAGE> listMsgs;
void ProcessMessages()
{
FUNCLOG
if (listMsgs.size() > 0)
{
int left = 25, top = 15;
list<MESSAGE>::iterator it = listMsgs.begin();
while (it != listMsgs.end())
{
DrawText(it->str, left + 1, top + 1, 0xff000000);
DrawText(it->str, left, top, 0xffffff30);
top += 15;
if ((int)(it->dwTimeStamp - timeGetTime()) < 0)
it = listMsgs.erase(it);
else ++it;
}
}
}
void ZZAddMessage(const char* pstr, u32 ms)
{
FUNCLOG
listMsgs.push_back(MESSAGE(pstr, timeGetTime() + ms));
ZZLog::Log("%s\n", pstr);
}
namespace ZZLog
{
std::string s_strLogPath("logs/");
@ -68,7 +103,19 @@ void SetDir(const char* dir)
void WriteToScreen(const char* pstr, u32 ms)
{
ZeroGS::AddMessage(pstr, ms);
ZZAddMessage(pstr, ms);
}
void WriteToScreen2(const char* fmt, ...)
{
va_list list;
char tmp[512];
va_start(list, fmt);
vsprintf(tmp, fmt, list);
va_end(list);
ZZAddMessage(tmp, 5000);
}
void _Message(const char *str)
@ -267,7 +314,7 @@ void Dev_Log(const char *fmt, ...)
void Debug_Log(const char *fmt, ...)
{
#if _DEBUG
#ifdef _DEBUG
va_list list;
va_start(list, fmt);

View File

@ -21,7 +21,9 @@
#define ZZLOG_H_INCLUDED
#include "Util.h"
#include <cstring>
extern void HandleGLError();
//Logging for errors that are called often should have a time counter.
#ifdef __LINUX__
@ -99,7 +101,7 @@ static bool SPAM_PASS;
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s(0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
ZeroGS::HandleGLError(); \
HandleGLError(); \
} \
}
@ -110,7 +112,7 @@ static bool SPAM_PASS;
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s (0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
ZeroGS::HandleGLError(); \
/* HandleGLError();*/ \
} \
}
#else
@ -154,18 +156,23 @@ inline const char *error_name(int err)
}
}
struct MESSAGE
{
MESSAGE() {}
MESSAGE(const char* p, u32 dw) { strcpy(str, p); dwTimeStamp = dw; }
char str[255];
u32 dwTimeStamp;
};
extern void DrawText(const char* pstr, int left, int top, u32 color);
extern void __LogToConsole(const char *fmt, ...);
// Subset of zerogs, to avoid that whole huge header.
namespace ZeroGS
{
extern void AddMessage(const char* pstr, u32 ms);
extern void SetAA(int mode);
extern bool Create(int width, int height);
extern void Destroy(bool bD3D);
extern void ZZAddMessage(const char* pstr, u32 ms = 5000);
extern void StartCapture();
extern void StopCapture();
}
namespace ZZLog
{
@ -176,6 +183,7 @@ extern void Close();
extern void Message(const char *fmt, ...);
extern void Log(const char *fmt, ...);
void WriteToScreen(const char* pstr, u32 ms = 5000);
void WriteToScreen2(const char* pstr, ...);
extern void WriteToConsole(const char *fmt, ...);
extern void Print(const char *fmt, ...);
extern void WriteLn(const char *fmt, ...);

View File

@ -21,11 +21,15 @@
// It draw picture direct on screen, so here we have interlacing and frame skipping.
//------------------ Includes
#include "Util.h"
#include "ZZoglCRTC.h"
#include "GLWin.h"
#include "ZZoglShaders.h"
using namespace ZeroGS;
#include "ZZoglShoots.h"
#include "ZZoglDrawing.h"
#include "rasterfont.h" // simple font
#include <math.h>
#include "ZZoglVB.h"
//------------------ Defines
#if !defined(ZEROGS_DEVBUILD)
@ -49,28 +53,33 @@ vector<u32> s_vecTempTextures; // temporary textures, released at the end of
extern bool g_bMakeSnapshot;
extern string strSnapshot;
extern void ExtWrite();
extern void ZZDestroy();
extern void ChangeDeviceSize(int nNewWidth, int nNewHeight);
extern GLuint vboRect;
// Adjusts vertex shader BitBltPos vector v to preserve aspect ratio. It used to emulate 4:3 or 16:9.
void ZeroGS::AdjustTransToAspect(float4& v)
void AdjustTransToAspect(float4& v)
{
double temp;
float f;
const float mult = 1 / 32767.0f;
if (conf.width * nBackbufferHeight > conf.height * nBackbufferWidth) // limited by width
if (conf.width * GLWin.backbuffer.h > conf.height * GLWin.backbuffer.w) // limited by width
{
// change in ratio
f = ((float)nBackbufferWidth / (float)conf.width) / ((float)nBackbufferHeight / (float)conf.height);
f = ((float)GLWin.backbuffer.w / (float)conf.width) / ((float)GLWin.backbuffer.h / (float)conf.height);
v.y *= f;
v.w *= f;
// scanlines mess up when not aligned right
v.y += (1 - (float)modf(v.y * (float)nBackbufferHeight * 0.5f + 0.05f, &temp)) * 2.0f / (float)nBackbufferHeight;
v.w += (1 - (float)modf(v.w * (float)nBackbufferHeight * 0.5f + 0.05f, &temp)) * 2.0f / (float)nBackbufferHeight;
v.y += (1 - (float)modf(v.y * (float)GLWin.backbuffer.h * 0.5f + 0.05f, &temp)) * 2.0f / (float)GLWin.backbuffer.h;
v.w += (1 - (float)modf(v.w * (float)GLWin.backbuffer.h * 0.5f + 0.05f, &temp)) * 2.0f / (float)GLWin.backbuffer.h;
}
else // limited by height
{
f = ((float)nBackbufferHeight / (float)conf.height) / ((float)nBackbufferWidth / (float)conf.width);
f -= (float)modf(f * nBackbufferWidth, &temp) / (float)nBackbufferWidth;
f = ((float)GLWin.backbuffer.h / (float)conf.height) / ((float)GLWin.backbuffer.w / (float)conf.width);
f -= (float)modf(f * GLWin.backbuffer.w, &temp) / (float)GLWin.backbuffer.w;
v.x *= f;
v.z *= f;
}
@ -139,9 +148,6 @@ inline void FrameSavingHelper()
}
#endif
}
// g_SaveFrameNum = 0;
// g_bSaveFlushedFrame = 1;
}
// Function populated tex0Info[2] array
@ -178,20 +184,11 @@ inline void FrameObtainDispinfo(u32 bInterlace, tex0Info* dispinfo)
}
}
extern bool s_bWriteDepth;
// Something should be done before Renderering the picture.
inline void RenderStartHelper(u32 bInterlace)
{
// Crashes Final Fantasy X at startup if uncommented. --arcum42
//#ifdef !defined(ZEROGS_DEVBUILD)
// if(g_nRealFrame < 80 ) {
// RenderCustom( min(1.0f, 2.0f - (float)g_nRealFrame / 40.0f) );
//
// if( g_nRealFrame == 79 )
// SAFE_RELEASE_TEX(ptexLogo);
// return;
// }
//#endif
if (conf.mrtdepth && pvs[8] == NULL)
{
conf.mrtdepth = 0;
@ -214,7 +211,7 @@ inline void RenderStartHelper(u32 bInterlace)
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); // switch to the backbuffer
glViewport(0, 0, nBackbufferWidth, nBackbufferHeight);
glViewport(0, 0, GLWin.backbuffer.w, GLWin.backbuffer.h);
// if interlace, only clear every other vsync
if (!bInterlace)
@ -634,8 +631,23 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
DrawTriangleArray();
}
extern RasterFont* font_p;
void DrawText(const char* pstr, int left, int top, u32 color)
{
FUNCLOG
ZZshGLDisableProfile();
float4 v;
v.SetColor(color);
glColor3f(v.z, v.y, v.x);
font_p->printString(pstr, left * 2.0f / (float)GLWin.backbuffer.w - 1, 1 - top * 2.0f / (float)GLWin.backbuffer.h, 0);
ZZshGLEnableProfile();
}
// Put FPS counter on screen (not in window title)
inline void AfterRenderDisplayFPS()
inline void DisplayFPS()
{
char str[64];
int left = 10, top = 15;
@ -645,16 +657,8 @@ inline void AfterRenderDisplayFPS()
DrawText(str, left, top, 0xffc0ffff);
}
// Swapping buffers, so we could use another window
inline void AfterRenderSwapBuffers()
{
if (glGetError() != GL_NO_ERROR) ZZLog::Debug_Log("glError before swap!");
GLWin.SwapGLBuffers();
}
// SnapeShoot helper
inline void AfterRenderMadeSnapshoot()
inline void MakeSnapshot()
{
if (!g_bMakeSnapshot) return;
@ -666,26 +670,40 @@ inline void AfterRenderMadeSnapshoot()
DrawText(str, left + 1, top + 1, 0xff000000);
DrawText(str, left, top, 0xffc0ffff);
if (SaveRenderTarget(strSnapshot != "" ? strSnapshot.c_str() : "temp.jpg", nBackbufferWidth, -nBackbufferHeight, 0)) //(conf.options.tga_snap)?0:1) ) {
if (SaveRenderTarget(strSnapshot != "" ? strSnapshot.c_str() : "temp.jpg", GLWin.backbuffer.w, -GLWin.backbuffer.h, 0)) //(conf.options.tga_snap)?0:1) ) {
{
char str[255];
sprintf(str, "saved %s\n", strSnapshot.c_str());
AddMessage(str, 500);
ZZAddMessage(str, 500);
}
g_bMakeSnapshot = false;
g_bMakeSnapshot = false;
}
// If needed reset
inline void AfterRendererResizeWindow()
// call to destroy video resources
void ZZReset()
{
Reset();
ChangeDeviceSize(s_nNewWidth, s_nNewHeight);
s_nNewWidth = s_nNewHeight = -1;
FUNCLOG
s_RTs.ResolveAll();
s_DepthRTs.ResolveAll();
vb[0].nCount = 0;
vb[1].nCount = 0;
memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
s_nLastResolveReset = 0;
icurctx = -1;
g_vsprog = g_psprog = 0;
ZZGSStateReset();
ZZDestroy();
//clear_drawfn();
if (ZZKick != NULL) delete ZZKick;
}
// Put new values on statistic variable
inline void AfterRenderCountStatistics()
inline void CountStatistics()
{
if (s_nWriteDepthCount > 0)
{
@ -708,7 +726,6 @@ inline void AfterRenderCountStatistics()
if (g_nDepthUsed > 0) --g_nDepthUsed;
s_ClutResolve = 0;
g_nDepthUpdateCount = 0;
}
@ -717,32 +734,33 @@ inline void AfterRendererUnimportantJob()
{
ProcessMessages();
if (g_bDisplayFPS) AfterRenderDisplayFPS();
if (g_bDisplayFPS) DisplayFPS();
AfterRenderSwapBuffers();
// Swapping buffers, so we could use another window
GLWin.SwapGLBuffers();
if (conf.wireframe())
{
// clear all targets
s_nWireframeCount = 1;
}
// clear all targets
if (conf.wireframe()) s_nWireframeCount = 1;
if (g_bMakeSnapshot)
{
AfterRenderMadeSnapshoot();
g_bMakeSnapshot = false;
}
if (g_bMakeSnapshot) MakeSnapshot();
CaptureFrame();
AfterRenderCountStatistics();
CountStatistics();
if (s_nNewWidth >= 0 && s_nNewHeight >= 0)
AfterRendererResizeWindow();
{
// If needed reset
ZZReset();
ChangeDeviceSize(s_nNewWidth, s_nNewHeight);
s_nNewWidth = s_nNewHeight = -1;
}
maxmin = 608;
}
extern u32 s_uFramebuffer;
// Swich Framebuffers
inline void AfterRendererSwitchBackToTextures()
{
@ -780,13 +798,13 @@ inline void AfterRendererAutoresetTargets()
if (conf.settings().auto_reset)
{
s_nResolveCounts[s_nCurResolveIndex] = s_nResolved;
s_nCurResolveIndex = (s_nCurResolveIndex + 1) % ARRAY_SIZE(s_nResolveCounts);
s_nCurResolveIndex = (s_nCurResolveIndex + 1) % ArraySize(s_nResolveCounts);
int total = 0;
for (int i = 0; i < ARRAY_SIZE(s_nResolveCounts); ++i) total += s_nResolveCounts[i];
for (int i = 0; i < ArraySize(s_nResolveCounts); ++i) total += s_nResolveCounts[i];
if (total / ARRAY_SIZE(s_nResolveCounts) > 3)
if (total / ArraySize(s_nResolveCounts) > 3)
{
if (s_nLastResolveReset > (int)(fFPS * 8))
{
@ -823,7 +841,7 @@ inline void AfterRendererAutoresetTargets()
int count = 0;
// The main renderer function
void ZeroGS::RenderCRTC(int interlace)
void RenderCRTC(int interlace)
{
if (FrameSkippingHelper()) return;

View File

@ -22,7 +22,6 @@
#include <stdlib.h>
#include "zerogs.h"
#include "targets.h"
#define INTERLACE_COUNT (bInterlace && interlace == (conf.interlace))
@ -36,7 +35,6 @@ extern int s_frameskipping;
extern float fFPS;
extern unsigned char zgsrevision, zgsbuild, zgsminor;
//extern u32 g_SaveFrameNum;
extern int s_nWriteDepthCount;
extern int s_nWireframeCount;
extern int s_nWriteDestAlphaTest;
@ -56,8 +54,6 @@ extern int g_nDepthUsed; // ffx2 pal movies
extern u32 s_ptexInterlace; // holds interlace fields
namespace ZeroGS
{
extern int s_nNewWidth, s_nNewHeight;
extern CRangeManager s_RangeMngr; // manages overwritten memory
@ -65,6 +61,8 @@ extern void FlushTransferRanges(const tex0Info* ptex);
extern void ProcessMessages();
void AdjustTransToAspect(float4& v);
void ZZGSStateReset();
// Interlace texture is lazy 1*(height) array of 1 and 0.
// If its height (named s_nInterlaceTexWidth here) is hanging we must redo
// the texture.
@ -95,6 +93,5 @@ inline u32 CreateInterlaceTex(int width)
return s_ptexInterlace;
}
}
#endif // ZZOGLCRTC_H_INCLUDED

View File

@ -22,11 +22,14 @@
//------------------ Includes
#include "GS.h"
#include "Mem.h"
#include "zerogs.h"
#include "GLWin.h"
#include "ZZoglShaders.h"
#include "targets.h"
#include "rasterfont.h" // simple font
#include "ZZoglDrawing.h"
#include "ZZoglVB.h"
// This include for windows resource file with Shaders
#ifdef _WIN32
# include "Win32.h"
@ -73,14 +76,6 @@ typedef void (APIENTRYP _PFNSWAPINTERVAL)(int);
map<string, GLbyte> mapGLExtensions;
namespace ZeroGS
{
extern void KickPoint();
extern void KickLine();
extern void KickTriangle();
extern void KickTriangleFan();
extern void KickSprite();
extern void KickDummy();
extern bool LoadEffects();
extern bool ZZshLoadExtraEffects();
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
@ -92,7 +87,8 @@ int g_nCurVBOIndex = 0;
inline bool CreateImportantCheck();
inline void CreateOtherCheck();
inline bool CreateOpenShadersFile();
}
void ZZGSStateReset();
//------------------ Dummies
#ifdef _WIN32
@ -129,15 +125,13 @@ void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum) = NULL;
extern u8* s_lpShaderResources;
// String's for shader file in developer mode
#ifdef DEVBUILD
#ifdef ZEROGS_DEVBUILD
char* EFFECT_NAME = "";
char* EFFECT_DIR = "";
#endif
/////////////////////
// graphics resources
FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
GLenum s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha; // set by zgsBlendFuncSeparateEXT
u32 s_stencilfunc, s_stencilref, s_stencilmask;
GLenum s_drawbuffers[] = { GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT };
@ -164,31 +158,21 @@ u32 ptexBlocks = 0, ptexConv16to32 = 0; // holds information on block tiling
u32 ptexBilinearBlocks = 0;
u32 ptexConv32to16 = 0;
int g_nDepthBias = 0;
//u32 g_bSaveFlushedFrame = 0;
extern void Delete_Avi_Capture();
extern void ZZDestroy();
extern void SetAA(int mode);
//------------------ Code
bool ZeroGS::IsGLExt(const char* szTargetExtension)
///< returns true if the the opengl extension is supported
bool IsGLExt(const char* szTargetExtension)
{
return mapGLExtensions.find(string(szTargetExtension)) != mapGLExtensions.end();
}
inline bool ZeroGS::Create_Window(int _width, int _height)
{
nBackbufferWidth = _width;
nBackbufferHeight = _height;
if (!GLWin.DisplayWindow(_width, _height)) return false;
//s_nFullscreen = (conf.fullscreen()) ? 1 : 0;
conf.mrtdepth = 0; // for now
return true;
}
// Function asks about different OGL extensions, that are required to setup accordingly. Return false if checks failed
inline bool ZeroGS::CreateImportantCheck()
inline bool CreateImportantCheck()
{
bool bSuccess = true;
#ifndef _WIN32
@ -220,7 +204,7 @@ inline bool ZeroGS::CreateImportantCheck()
}
// This is a check for less important open gl extensions.
inline void ZeroGS::CreateOtherCheck()
inline void CreateOtherCheck()
{
if (!IsGLExt("GL_EXT_blend_equation_separate") || glBlendEquationSeparateEXT == NULL)
{
@ -292,18 +276,21 @@ inline void ZeroGS::CreateOtherCheck()
#endif
}
// open shader file according to build target
inline bool ZeroGS::CreateOpenShadersFile()
#ifdef _WIN32
__forceinline bool LoadShadersFromRes()
{
#ifndef DEVBUILD
# ifdef _WIN32
HRSRC hShaderSrc = FindResource(hInst, MAKEINTRESOURCE(IDR_SHADERS), RT_RCDATA);
assert(hShaderSrc != NULL);
HGLOBAL hShaderGlob = LoadResource(hInst, hShaderSrc);
assert(hShaderGlob != NULL);
s_lpShaderResources = (u8*)LockResource(hShaderGlob);
# else // not _WIN32
return true;
}
#else
__forceinline bool LoadShadersFromDat()
{
FILE* fres = fopen("ps2hw.dat", "rb");
if (fres == NULL)
@ -324,13 +311,16 @@ inline bool ZeroGS::CreateOpenShadersFile()
fseek(fres, 0, SEEK_SET);
fread(s_lpShaderResources, s, 1, fres);
s_lpShaderResources[s] = 0;
# endif // _WIN32
#else // defined(ZEROGS_DEVBUILD)
# ifndef _WIN32 // NOT WINDOWS
return true;
}
__forceinline bool LoadShadersFromFX()
{
// test if ps2hw.fx exists
char tempstr[255];
char curwd[255];
getcwd(curwd, ARRAY_SIZE(curwd));
getcwd(curwd, ArraySize(curwd));
strcpy(tempstr, "/plugins/");
sprintf(EFFECT_NAME, "%sps2hw.fx", tempstr);
@ -354,59 +344,56 @@ inline bool ZeroGS::CreateOpenShadersFile()
sprintf(EFFECT_DIR, "%s/%s", curwd, tempstr);
sprintf(EFFECT_NAME, "%sps2hw.fx", EFFECT_DIR);
return true;
}
#endif
// open shader file according to build target
inline bool CreateOpenShadersFile()
{
#ifndef DEVBUILD
# ifdef _WIN32
return LoadShadersFromRes();
# else // not _WIN32
return LoadShadersFromDat();
# endif // _WIN32
#else // defined(ZEROGS_DEVBUILD)
# ifndef _WIN32 // NOT WINDOWS
return LoadShadersFromFX();
// No else clause?
#endif
#endif // !defined(ZEROGS_DEVBUILD)
return true;
}
// Read all extensions name and fill mapGLExtensions
inline bool CreateFillExtensionsMap()
{
// fill the opengl extension map
const char* ptoken = (const char*)glGetString(GL_EXTENSIONS);
string temp("");
int max_ext = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &max_ext);
if (ptoken == NULL) return false;
PFNGLGETSTRINGIPROC glGetStringi = 0;
glGetStringi = (PFNGLGETSTRINGIPROC)wglGetProcAddress("glGetStringi");
int prevlog = conf.log;
conf.log = 1;
ZZLog::GS_Log("Supported OpenGL Extensions:\n%s\n", ptoken); // write to the log file
// Probably a better way to do it, but seems to crash.
/*int n;
glGetIntegerv(GL_NUM_EXTENSIONS, &n);
ZZLog::GS_Log("Supported OpenGL Extensions:\n");
for (int i = 0; i < n; i++)
for (GLint i = 0; i < max_ext; i++)
{
ZZLog::GS_Log("%s/n", (const char*)glGetStringi(GL_EXTENSIONS, i));
}*/
string extension((const char*)glGetStringi(GL_EXTENSIONS, i));
mapGLExtensions[extension];
conf.log = prevlog;
// insert all exts into mapGLExtensions
const char* pend = NULL;
while (ptoken != NULL)
{
pend = strchr(ptoken, ' ');
if (pend != NULL)
{
mapGLExtensions[string(ptoken, pend-ptoken)];
}
else
{
mapGLExtensions[string(ptoken)];
break;
}
ptoken = pend;
while (*ptoken == ' ') ++ptoken;
temp = temp + extension;
if (i != (max_ext - 1)) temp += ", ";
}
// Write the extension list to the log, but only write it to the screen on a debug build.
#ifndef _DEBUG
ZZLog::Log("%d supported OpenGL Extensions: %s\n", max_ext, temp.c_str());
#endif
ZZLog::Debug_Log("%d supported OpenGL Extensions: %s\n", max_ext, temp.c_str());
return true;
}
@ -445,20 +432,22 @@ inline bool TryBlinearFormat(GLint fmt32, GLint fmt16, const GLvoid* vBilinearDa
}
bool ZeroGS::Create(int _width, int _height)
bool ZZCreate(int _width, int _height)
{
GLenum err = GL_NO_ERROR;
bool bSuccess = true;
int i;
Destroy(1);
GSStateReset();
ZZDestroy();
ZZGSStateReset();
if (!GLWin.DisplayWindow(_width, _height)) return false;
conf.mrtdepth = 0; // for now
if (!Create_Window(_width, _height)) return false;
if (!CreateFillExtensionsMap()) return false;
if (!CreateImportantCheck()) return false;
ZeroGS::CreateOtherCheck();
CreateOtherCheck();
// check the max texture width and height
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &g_MaxTexWidth);
@ -509,14 +498,9 @@ bool ZeroGS::Create(int _width, int _height)
if (err != GL_NO_ERROR) bSuccess = false;
// init draw fns
drawfn[0] = KickPoint;
drawfn[1] = KickLine;
drawfn[2] = KickLine;
drawfn[3] = KickTriangle;
drawfn[4] = KickTriangle;
drawfn[5] = KickTriangleFan;
drawfn[6] = KickSprite;
drawfn[7] = KickDummy;
//init_drawfn();
if (ZZKick != NULL) delete ZZKick;
ZZKick = new Kick;
SetAA(conf.aa);
@ -527,7 +511,7 @@ bool ZeroGS::Create(int _width, int _height)
//s_bWriteDepth = true;
GL_BLEND_ALL(GL_ONE, GL_ONE, GL_ONE, GL_ONE);
glViewport(0, 0, nBackbufferWidth, nBackbufferHeight); // Reset The Current Viewport
glViewport(0, 0, GLWin.backbuffer.w, GLWin.backbuffer.h); // Reset The Current Viewport
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
@ -574,7 +558,7 @@ bool ZeroGS::Create(int _width, int _height)
g_vboBuffers.resize(VB_NUMBUFFERS);
glGenBuffers((GLsizei)g_vboBuffers.size(), &g_vboBuffers[0]);
for (i = 0; i < (int)g_vboBuffers.size(); ++i)
for (int i = 0; i < (int)g_vboBuffers.size(); ++i)
{
glBindBuffer(GL_ARRAY_BUFFER, g_vboBuffers[i]);
glBufferData(GL_ARRAY_BUFFER, 0x100*sizeof(VertexGPU), NULL, GL_STREAM_DRAW);
@ -657,32 +641,16 @@ bool ZeroGS::Create(int _width, int _height)
VertexGPU* pvert = &verts[0];
pvert->x = -0x7fff;
pvert->y = 0x7fff;
pvert->z = 0;
pvert->s = 0;
pvert->t = 0;
pvert->set_xyzst(-0x7fff, 0x7fff, 0, 0, 0);
pvert++;
pvert->x = 0x7fff;
pvert->y = 0x7fff;
pvert->z = 0;
pvert->s = 1;
pvert->t = 0;
pvert->set_xyzst(0x7fff, 0x7fff, 0, 1, 0);
pvert++;
pvert->x = -0x7fff;
pvert->y = -0x7fff;
pvert->z = 0;
pvert->s = 0;
pvert->t = 1;
pvert->set_xyzst(-0x7fff, -0x7fff, 0, 0, 1);
pvert++;
pvert->x = 0x7fff;
pvert->y = -0x7fff;
pvert->z = 0;
pvert->s = 1;
pvert->t = 1;
pvert->set_xyzst(0x7fff, -0x7fff, 0, 1, 1);
pvert++;
glBufferDataARB(GL_ARRAY_BUFFER, 4*sizeof(VertexGPU), &verts[0], GL_STATIC_DRAW);
@ -706,7 +674,7 @@ bool ZeroGS::Create(int _width, int _height)
vector<u32> conv16to32data(256*256);
for (i = 0; i < 256*256; ++i)
for (int i = 0; i < 256*256; ++i)
{
u32 tempcol = RGBA16to32(i);
// have to flip r and b
@ -730,7 +698,7 @@ bool ZeroGS::Create(int _width, int _height)
u32* dst = &conv32to16data[0];
for (i = 0; i < 32; ++i)
for (int i = 0; i < 32; ++i)
{
for (int j = 0; j < 32; ++j)
{
@ -778,8 +746,6 @@ bool ZeroGS::Create(int _width, int _height)
vb[0].Init(VB_BUFFERSIZE);
vb[1].Init(VB_BUFFERSIZE);
// g_bSaveFlushedFrame = 1;
g_vsprog = g_psprog = 0;
if (glGetError() == GL_NO_ERROR)
@ -793,7 +759,7 @@ bool ZeroGS::Create(int _width, int _height)
}
}
void ZeroGS::Destroy(bool bD3D)
void ZZDestroy()
{
Delete_Avi_Capture();
@ -822,7 +788,7 @@ void ZeroGS::Destroy(bool bD3D)
if (pvs != NULL)
{
for (int i = 0; i < ARRAY_SIZE(pvs); ++i)
for (int i = 0; i < ArraySize(pvs); ++i)
{
SAFE_RELEASE_PROG(pvs[i]);
}
@ -830,7 +796,7 @@ void ZeroGS::Destroy(bool bD3D)
if (ppsRegular != NULL)
{
for (int i = 0; i < ARRAY_SIZE(ppsRegular); ++i)
for (int i = 0; i < ArraySize(ppsRegular); ++i)
{
SAFE_RELEASE_PROG(ppsRegular[i].prog);
}
@ -838,7 +804,7 @@ void ZeroGS::Destroy(bool bD3D)
if (ppsTexture != NULL)
{
for (int i = 0; i < ARRAY_SIZE(ppsTexture); ++i)
for (int i = 0; i < ArraySize(ppsTexture); ++i)
{
SAFE_RELEASE_PROG(ppsTexture[i].prog);
}
@ -857,7 +823,7 @@ void ZeroGS::Destroy(bool bD3D)
SAFE_RELEASE_PROG(ppsCRTC24[1].prog);
SAFE_RELEASE_PROG(ppsOne.prog);
SAFE_DELETE(font_p);
safe_delete(font_p);
GLWin.ReleaseContext();

View File

@ -0,0 +1,254 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "ZZoglDrawing.h"
#include "ZZoglVB.h"
Kick* ZZKick;
const u32 g_primmult[8] = { 1, 2, 2, 3, 3, 3, 2, 0xff };
const u32 g_primsub[8] = { 1, 2, 1, 3, 1, 1, 2, 0 };
const GLenum primtype[8] = { GL_POINTS, GL_LINES, GL_LINES, GL_TRIANGLES, GL_TRIANGLES, GL_TRIANGLES, GL_TRIANGLES, 0xffffffff };
extern float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
// Still thinking about the best place to put this.
// called on a primitive switch
void Prim()
{
FUNCLOG
VB& curvb = vb[prim->ctxt];
if (curvb.CheckPrim()) Flush(prim->ctxt);
curvb.SetCurrentPrim();
}
// return true if triangle SHOULD be painted.
// Hackish and should be replaced.
bool __forceinline NoHighlights(int i)
{
//Old code
return (!(conf.settings().xenosaga_spec) || !vb[i].zbuf.zmsk || prim->iip) ;
}
// Not inlining for the moment to avoid getting 'unresolved external symbol' errors in Windows.
// This could also be resolved by moving the function into the header...
void Kick::KickVertex(bool adc)
{
FUNCLOG
if (++gs.primC >= (int)g_primmult[prim->prim])
{
if (!adc && NoHighlights(prim->ctxt)) DrawPrim(prim->prim);
else DirtyValidPrevPrim();
gs.primC -= g_primsub[prim->prim];
}
gs.primIndex = gs.primNext();
}
template<bool DO_Z_FOG>
void Kick::Set_Vertex(VertexGPU *p, Vertex & gsvertex)
{
VB& curvb = vb[prim->ctxt];
p->move_x(gsvertex, curvb.offset.x);
p->move_y(gsvertex, curvb.offset.y);
if(DO_Z_FOG) {
p->move_z(gsvertex, curvb.zprimmask);
p->move_fog(gsvertex);
}
p->rgba = prim->iip ? gsvertex.rgba : gs.rgba;
if (conf.settings().texa)
{
u32 B = ((p->rgba & 0xfe000000) >> 1) + (0x01000000 * vb[prim->ctxt].fba.fba);
p->rgba = (p->rgba & 0xffffff) + B;
}
if (prim->tme)
{
if (prim->fst)
{
p->s = (float)gsvertex.u * fiTexWidth[prim->ctxt];
p->t = (float)gsvertex.v * fiTexHeight[prim->ctxt];
p->q = 1;
}
else
{
p->s = gsvertex.s;
p->t = gsvertex.t;
p->q = gsvertex.q;
}
}
}
__forceinline void Kick::Output_Vertex(VertexGPU vert, u32 id)
{
#ifdef WRITE_PRIM_LOGS
ZZLog::Prim_Log("%c%d(%d): xyzf=(%4d,%4d,0x%x,%3d), rgba=0x%8.8x, stq = (%2.5f,%2.5f,%2.5f)",
id == 0 ? '*' : ' ', id, prim->prim, vert.x / 8, vert.y / 8, vert.z, vert.f / 128,
vert.rgba, Clamp(vert.s, -10, 10), Clamp(vert.t, -10, 10), Clamp(vert.q, -10, 10));
#endif
}
void Kick::DrawPrim(u32 prim_type)
{
VB& curvb = vb[prim->ctxt];
curvb.FlushTexData();
if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
{
assert(vb[prim->ctxt].nCount == 0);
Flush(!prim->ctxt);
}
// check enough place is left for the biggest primitive (sprite)
// This function is unlikely to be called so do not inline it.
if (unlikely(curvb.nCount + 6 > curvb.nNumVertices))
curvb.IncreaseVertexBuffer();
VertexGPU* p = curvb.pBufferData + curvb.nCount;
u32 prev;
u32 last;
switch(prim_type) {
case PRIM_POINT:
Set_Vertex<true>(&p[0], gs.gsvertex[gs.primIndex]);
curvb.nCount ++;
break;
case PRIM_LINE:
Set_Vertex<true>(&p[0], gs.gsvertex[gs.primPrev()]);
Set_Vertex<true>(&p[1], gs.gsvertex[gs.primIndex]);
curvb.nCount += 2;
break;
case PRIM_LINE_STRIP:
if (likely(ValidPrevPrim)) {
assert(curvb.nCount >= 1);
p[0] = p[-1];
} else {
Set_Vertex<true>(&p[0], gs.gsvertex[gs.primPrev()]);
ValidPrevPrim = true;
}
Set_Vertex<true>(&p[1], gs.gsvertex[gs.primIndex]);
curvb.nCount += 2;
break;
case PRIM_TRIANGLE:
Set_Vertex<true>(&p[0], gs.gsvertex[gs.primPrev(2)]);
Set_Vertex<true>(&p[1], gs.gsvertex[gs.primPrev()]);
Set_Vertex<true>(&p[2], gs.gsvertex[gs.primIndex]);
curvb.nCount += 3;
break;
case PRIM_TRIANGLE_STRIP:
if (likely(ValidPrevPrim)) {
assert(curvb.nCount >= 2);
p[0] = p[-2];
p[1] = p[-1];
} else {
Set_Vertex<true>(&p[0], gs.gsvertex[gs.primPrev(2)]);
Set_Vertex<true>(&p[1], gs.gsvertex[gs.primPrev()]);
ValidPrevPrim = true;
}
Set_Vertex<true>(&p[2], gs.gsvertex[gs.primIndex]);
curvb.nCount += 3;
break;
case PRIM_TRIANGLE_FAN:
if (likely(ValidPrevPrim)) {
assert(curvb.nCount >= 2);
VertexGPU* TriFanVert = curvb.pBufferData + gs.nTriFanVert;
p[0] = TriFanVert[0];
p[1] = p[-1];
} else {
Set_Vertex<true>(&p[0], gs.gsTriFanVertex);
Set_Vertex<true>(&p[1], gs.gsvertex[gs.primPrev(1)]);
ValidPrevPrim = true;
// Remenber the base for future processing
gs.nTriFanVert = curvb.nCount;
}
Set_Vertex<true>(&p[2], gs.gsvertex[gs.primIndex]);
curvb.nCount += 3;
break;
case PRIM_SPRITE:
prev = gs.primPrev();
last = gs.primIndex;
// sprite is too small and AA shows lines (tek4, Mana Khemia)
gs.gsvertex[last].x += (4 * AA.x);
gs.gsvertex[last].y += (4 * AA.y);
// might be bad sprite (KH dialog text)
//if( gs.gsvertex[prev].x == gs.gsvertex[last].x || gs.gsvertex[prev].y == gs.gsvertex[last].y )
//return;
// process sprite as 2 triangles. The common diagonal is 0,1 and 3,4
Set_Vertex<false>(&p[0], gs.gsvertex[prev]);
Set_Vertex<true>(&p[1], gs.gsvertex[last]);
// Only fog and Z of last vertex is valid
p[0].z = p[1].z;
p[0].f = p[1].f;
// Duplicate the vertex
p[3] = p[0];
p[2] = p[0];
p[4] = p[1];
p[5] = p[1];
// Move some vertex x coord to create the others corners of the sprite
p[2].s = p[1].s;
p[2].x = p[1].x;
p[5].s = p[0].s;
p[5].x = p[0].x;
curvb.nCount += 6;
break;
default: break;
}
// Print DEBUG info and code assertion
switch(prim_type) {
case PRIM_TRIANGLE:
case PRIM_TRIANGLE_STRIP:
case PRIM_TRIANGLE_FAN:
assert(gs.primC >= 3);
Output_Vertex(p[2],2);
case PRIM_LINE:
case PRIM_LINE_STRIP:
case PRIM_SPRITE:
assert(gs.primC >= 2);
Output_Vertex(p[1],1);
case PRIM_POINT:
assert(gs.primC >= 1);
Output_Vertex(p[0],0);
default: break;
}
}

View File

@ -0,0 +1,60 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef ZZOGLDRAWING_H_INCLUDED
#define ZZOGLDRAWING_H_INCLUDED
#include "Util.h"
#include "GS.h"
extern bool __forceinline NoHighlights(int i);
enum PRIM_TYPE {
PRIM_POINT = 0,
PRIM_LINE,
PRIM_LINE_STRIP,
PRIM_TRIANGLE,
PRIM_TRIANGLE_STRIP,
PRIM_TRIANGLE_FAN,
PRIM_SPRITE,
PRIM_DUMMY
};
class Kick
{
private:
// template<bool DO_Z_FOG> void Set_Vertex(VertexGPU *p, int i);
template<bool DO_Z_FOG> void Set_Vertex(VertexGPU *p, Vertex &gsvertex);
void Output_Vertex(VertexGPU vert, u32 id);
bool ValidPrevPrim;
public:
Kick() { }
~Kick() { }
void KickVertex(bool adc);
void DrawPrim(u32 i);
inline void DirtyValidPrevPrim() {
ValidPrevPrim = 0;
}
};
extern Kick* ZZKick;
#endif // ZZOGLDRAWING_H_INCLUDED

View File

@ -23,12 +23,10 @@
#include "GS.h"
#include "Mem.h"
#include "zerogs.h"
#include "targets.h"
#include "ZZoglFlushHack.h"
#include "ZZoglShaders.h"
using namespace ZeroGS;
#include <math.h>
//------------------ Defines
#ifndef ZEROGS_DEVBUILD
@ -122,7 +120,6 @@ void Draw(const VB& curvb)
extern int g_nDepthBias;
extern float g_fBlockMult; // used for old cards, that do not support Alpha-32float textures. We store block data in u16 and use it.
bool g_bUpdateStencil = 1;
//u32 g_SaveFrameNum = 0; // ZZ
extern ZZshProgram g_psprog; // 2 -- ZZ
@ -147,6 +144,7 @@ static u32 s_ptexNextSet[2] = {0}; // ZZ
extern vector<u32> s_vecTempTextures; // temporary textures, released at the end of every frame
extern bool s_bTexFlush;
extern int g_nCurVBOIndex;
bool s_bWriteDepth = false;
bool s_bDestAlphaTest = false;
int s_ClutResolve = 0; // ZZ
@ -193,11 +191,9 @@ int s_nWireframeCount = 0;
//------------------ Namespace
namespace ZeroGS
{
VB vb[2];
float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
extern vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
//u8 s_AAx = 0, s_AAy = 0; // if AAy is set, then AAx has to be set
Point AA = {0,0};
@ -211,7 +207,7 @@ void FlushTransferRanges(const tex0Info* ptex); //zz
void SetTexVariables(int context, FRAGMENTSHADER* pfragment); // zz
void SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint); // zz
void SetAlphaVariables(const alphaInfo& ainfo); // zzz
void ResetAlphaVariables();
//void ResetAlphaVariables();
inline void SetAlphaTestInt(pixTest curtest);
@ -221,8 +217,11 @@ inline void ProcessStencil(const VB& curvb);
inline void RenderFBA(const VB& curvb, ZZshParameter sOneColor);
inline void ProcessFBA(const VB& curvb, ZZshParameter sOneColor); // zz
void SetContextTarget(int context);
}
void SetWriteDepth();
bool IsWriteDepth();
void SetDestAlphaTest();
//------------------ Code
@ -279,6 +278,8 @@ inline void SwitchWireframeOn()
}
}
extern u32 ptexBilinearBlocks;
int GetTexFilter(const tex1Info& tex1)
{
// always force
@ -299,11 +300,11 @@ int GetTexFilter(const tex1Info& tex1)
return texfilter;
}
void ZeroGS::ReloadEffects()
void ReloadEffects()
{
#ifdef ZEROGS_DEVBUILD
for (int i = 0; i < ARRAY_SIZE(ppsTexture); ++i)
for (int i = 0; i < ArraySize(ppsTexture); ++i)
{
SAFE_RELEASE_PROG(ppsTexture[i].prog);
}
@ -321,7 +322,7 @@ inline void VisualBufferMessage(int context)
{
#if defined(WRITE_PRIM_LOGS) && defined(_DEBUG)
BufferNumber++;
ZeroGS::VB& curvb = vb[context];
VB& curvb = vb[context];
static const char* patst[8] = { "NEVER", "ALWAYS", "LESS", "LEQUAL", "EQUAL", "GEQUAL", "GREATER", "NOTEQUAL"};
static const char* pztst[4] = { "NEVER", "ALWAYS", "GEQUAL", "GREATER" };
static const char* pafail[4] = { "KEEP", "FB_ONLY", "ZB_ONLY", "RGB_ONLY" };
@ -344,7 +345,6 @@ inline void VisualBufferMessage(int context)
ZZLog::Error_Log("TGA name '%s'.", Name);
free(Name);
// }
// ZZLog::Debug_Log("frame: %d, buffer %ld.\n", g_SaveFrameNum, BufferNumber);
ZZLog::Debug_Log("buffer %ld.\n", BufferNumber);
#endif
}
@ -353,12 +353,10 @@ inline void SaveRendererTarget(VB& curvb)
{
#ifdef _DEBUG
// if (g_bSaveFlushedFrame & 0x80000000)
// {
// Needs a # after rndr to work...
// char str[255];
// sprintf(str, "rndr%d.tga", g_SaveFrameNum);
// sprintf(str, "rndr.tga");
// SaveRenderTarget(str, curvb.prndr->fbw, curvb.prndr->fbh, 0);
// }
#endif
}
@ -466,18 +464,18 @@ inline CRenderTarget* FlushReGetTarget(int& tbw, int& tbp0, int& tpsm, VB& curvb
if ((ptextarg == NULL) && (tpsm == PSMT8) && (conf.settings().reget))
{
// check for targets with half the width. Break Valkyrie Chronicles
ptextarg = s_RTs.GetTarg(tbp0, tbw / 2, curvb);
ptextarg = s_RTs.GetTarg(tbp0, tbw / 2);
if (ptextarg == NULL)
{
tbp0 &= ~0x7ff;
ptextarg = s_RTs.GetTarg(tbp0, tbw / 2, curvb); // mgs3 hack
ptextarg = s_RTs.GetTarg(tbp0, tbw / 2); // mgs3 hack
if (ptextarg == NULL)
{
// check the next level (mgs3)
tbp0 &= ~0xfff;
ptextarg = s_RTs.GetTarg(tbp0, tbw / 2, curvb); // mgs3 hack
ptextarg = s_RTs.GetTarg(tbp0, tbw / 2); // mgs3 hack
}
if (ptextarg != NULL && ptextarg->start > tbp0*256)
@ -492,7 +490,7 @@ inline CRenderTarget* FlushReGetTarget(int& tbw, int& tbp0, int& tpsm, VB& curvb
if (PSMT_ISZTEX(tpsm) && (ptextarg == NULL))
{
// try depth
ptextarg = s_DepthRTs.GetTarg(tbp0, tbw, curvb);
ptextarg = s_DepthRTs.GetTarg(tbp0, tbw);
}
if ((ptextarg == NULL) && (conf.settings().texture_targs))
@ -509,24 +507,24 @@ inline CRenderTarget* FlushReGetTarget(int& tbw, int& tbp0, int& tpsm, VB& curvb
{
if (ptextarg == NULL)
{
printf("Miss %x 0x%x %d\n", tbw, tbp0, tpsm);
ZZLog::Debug_Log("Miss %x 0x%x %d", tbw, tbp0, tpsm);
typedef map<u32, CRenderTarget*> MAPTARGETS;
for (MAPTARGETS::iterator itnew = s_RTs.mapTargets.begin(); itnew != s_RTs.mapTargets.end(); ++itnew)
{
printf("\tRender %x 0x%x %x\n", itnew->second->fbw, itnew->second->fbp, itnew->second->psm);
ZZLog::Debug_Log("\tRender %x 0x%x %x", itnew->second->fbw, itnew->second->fbp, itnew->second->psm);
}
for (MAPTARGETS::iterator itnew = s_DepthRTs.mapTargets.begin(); itnew != s_DepthRTs.mapTargets.end(); ++itnew)
{
printf("\tDepth %x 0x%x %x\n", itnew->second->fbw, itnew->second->fbp, itnew->second->psm);
ZZLog::Debug_Log("\tDepth %x 0x%x %x", itnew->second->fbw, itnew->second->fbp, itnew->second->psm);
}
printf("\tCurvb 0x%x 0x%x 0x%x %x\n", curvb.frame.fbp, curvb.prndr->end, curvb.prndr->fbp, curvb.prndr->fbw);
ZZLog::Debug_Log("\tCurvb 0x%x 0x%x 0x%x %x", curvb.frame.fbp, curvb.prndr->end, curvb.prndr->fbp, curvb.prndr->fbw);
}
else
printf("Hit %x 0x%x %x\n", tbw, tbp0, tpsm);
ZZLog::Debug_Log("Hit %x 0x%x %x", tbw, tbp0, tpsm);
}
#endif
@ -545,7 +543,7 @@ inline CRenderTarget* FlushGetTarget(VB& curvb)
if (curvb.bNeedTexCheck)
{
printf("How it is possible?\n");
ZZLog::Error_Log("How it is possible?");
// not yet initied, but still need to get correct target! (xeno3 ingame)
tbp0 = ZZOglGet_tbp0_TexBits(curvb.uNextTex0Data[0]);
tbw = ZZOglGet_tbw_TexBitsMult(curvb.uNextTex0Data[0]);
@ -558,7 +556,7 @@ inline CRenderTarget* FlushGetTarget(VB& curvb)
tpsm = curvb.tex0.psm;
}
ptextarg = s_RTs.GetTarg(tbp0, tbw, curvb);
ptextarg = s_RTs.GetTarg(tbp0, tbw);
if (ptextarg == NULL)
ptextarg = FlushReGetTarget(tbw, tbp0, tpsm, curvb);
@ -733,7 +731,7 @@ inline void FlushDecodeClut(VB& curvb, GLuint& ptexclut)
int entries = PSMT_IS8CLUT(curvb.tex0.psm) ? 256 : 16;
if (curvb.tex0.csm && curvb.tex0.csa)
printf("ERROR, csm1\n");
ZZLog::Debug_Log("ERROR, csm1.");
if (PSMT_IS32BIT(curvb.tex0.cpsm)) // 32 bit
{
@ -1088,6 +1086,8 @@ inline void AlphaSetStencil(bool DoIt)
else glDisable(GL_STENCIL_TEST);
}
//inline u32 FtoDW(float f) { return (*((u32*)&f)); }
inline void AlphaSetDepthTest(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pfragment)
{
glDepthMask(!curvb.zbuf.zmsk && curtest.zte);
@ -1223,7 +1223,7 @@ inline void AlphaRenderStencil(VB& curvb, bool s_bDestAlphaTest, bool bCanRender
inline void AlphaTest(VB& curvb)
{
// printf ("%d %d %d %d %d\n", curvb.test.date, curvb.test.datm, gs.texa.aem, curvb.test.ate, curvb.test.atst );
// ZZLog::Debug_Log("%d %d %d %d %d", curvb.test.date, curvb.test.datm, gs.texa.aem, curvb.test.ate, curvb.test.atst );
// return;
// Zeydlitz changed this with a reason! It's an "Alpha more than 1 hack."
@ -1236,7 +1236,7 @@ inline void AlphaTest(VB& curvb)
else
{
glAlphaFunc(GL_LESS, 1.0f);
printf("%d %d %d\n", curvb.test.date, curvb.test.datm, gs.texa.aem);
ZZLog::Debug_Log("%d %d %d", curvb.test.date, curvb.test.datm, gs.texa.aem);
}
}
@ -1447,7 +1447,6 @@ inline void AlphaSaveTarget(VB& curvb)
#ifdef _DEBUG
return; // Do nothing
// if( g_bSaveFlushedFrame & 0xf ) {
//#ifdef _WIN32
// CreateDirectory("frames", NULL);
//#else
@ -1456,13 +1455,14 @@ inline void AlphaSaveTarget(VB& curvb)
// system(strdir);
//#endif
// char str[255];
// sprintf(str, "frames/frame%.4d.tga", g_SaveFrameNum++);
// Needs a # after frame to work properly.
// sprintf(str, "frames/frame.tga");
// //glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, 0 ); // switch to the backbuffer
// //glFlush();
// //SaveTexture("tex.jpg", GL_TEXTURE_RECTANGLE_NV, curvb.prndr->ptex, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh));
// SaveRenderTarget(str, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh), 0);
// }
#endif
}
@ -1477,7 +1477,7 @@ inline void FlushUndoFiter(u32 dwFilterOpts)
}
// This is the most important function! It draws all collected info onscreen.
void ZeroGS::Flush(int context)
void Flush(int context)
{
FUNCLOG
VB& curvb = vb[context];
@ -1535,13 +1535,13 @@ void ZeroGS::Flush(int context)
GL_REPORT_ERRORD();
}
void ZeroGS::FlushBoth()
void FlushBoth()
{
Flush(0);
Flush(1);
}
inline void ZeroGS::RenderFBA(const VB& curvb, ZZshParameter sOneColor)
inline void RenderFBA(const VB& curvb, ZZshParameter sOneColor)
{
// add fba to all pixels
GL_STENCILFUNC(GL_ALWAYS, STENCIL_FBA, 0xff);
@ -1585,7 +1585,7 @@ inline void ZeroGS::RenderFBA(const VB& curvb, ZZshParameter sOneColor)
GL_ZTEST(curvb.test.zte);
}
__forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, ZZshParameter sOneColor)
__forceinline void RenderAlphaTest(const VB& curvb, ZZshParameter sOneColor)
{
if (!g_bUpdateStencil) return;
@ -1653,7 +1653,7 @@ __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, ZZshParameter sOneCo
}
}
inline void ZeroGS::RenderStencil(const VB& curvb, u32 dwUsingSpecialTesting)
inline void RenderStencil(const VB& curvb, u32 dwUsingSpecialTesting)
{
//NOTE: This stencil hack for dest alpha testing ONLY works when
// the geometry in one DrawPrimitive call does not overlap
@ -1670,7 +1670,7 @@ inline void ZeroGS::RenderStencil(const VB& curvb, u32 dwUsingSpecialTesting)
GL_STENCILFUNC_SET();
}
inline void ZeroGS::ProcessStencil(const VB& curvb)
inline void ProcessStencil(const VB& curvb)
{
assert(!curvb.fba.fba);
@ -1723,7 +1723,7 @@ inline void ZeroGS::ProcessStencil(const VB& curvb)
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
}
__forceinline void ZeroGS::ProcessFBA(const VB& curvb, ZZshParameter sOneColor)
__forceinline void ProcessFBA(const VB& curvb, ZZshParameter sOneColor)
{
if ((curvb.frame.fbm&0x80000000)) return;
@ -1784,7 +1784,7 @@ __forceinline void ZeroGS::ProcessFBA(const VB& curvb, ZZshParameter sOneColor)
GL_ZTEST(curvb.test.zte);
}
void ZeroGS::SetContextTarget(int context)
void SetContextTarget(int context)
{
FUNCLOG
VB& curvb = vb[context];
@ -1841,7 +1841,7 @@ void ZeroGS::SetContextTarget(int context)
if (curvb.prndr->status & CRenderTarget::TS_NeedUpdate)
{
/* if(bSetTarg) {
* printf ( " Here\n ");
* ZZLog::Debug_Log( " Here ");
* if(s_bWriteDepth) {
* curvb.pdepth->SetRenderTarget(1);
* curvb.pdepth->SetDepthStencilSurface();
@ -1859,7 +1859,7 @@ void ZeroGS::SetContextTarget(int context)
//if( bSetTarg && ((vb[0].pdepth != vb[1].pdepth && vb[!context].bVarsSetTarg) || !vb[context].bVarsSetTarg) )
curvb.pdepth->SetDepthStencilSurface();
if (conf.mrtdepth && ZeroGS::IsWriteDepth()) curvb.pdepth->SetRenderTarget(1);
if (conf.mrtdepth && IsWriteDepth()) curvb.pdepth->SetRenderTarget(1);
if (s_ptexCurSet[0] == curvb.prndr->ptex) s_ptexCurSet[0] = 0;
if (s_ptexCurSet[1] == curvb.prndr->ptex) s_ptexCurSet[1] = 0;
@ -1892,7 +1892,7 @@ void ZeroGS::SetContextTarget(int context)
}
void ZeroGS::SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint)
void SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint)
{
FUNCLOG
@ -1929,14 +1929,14 @@ void SetTexClamping(int context, FRAGMENTSHADER* pfragment)
{
FUNCLOG
SetShaderCaller("SetTexClamping");
clampInfo* pclamp = &ZeroGS::vb[context].clamp;
clampInfo* pclamp = &vb[context].clamp;
float4 v, v2;
v.x = v.y = 0;
u32* ptex = ZeroGS::vb[context].ptexClamp;
u32* ptex = vb[context].ptexClamp;
ptex[0] = ptex[1] = 0;
float fw = ZeroGS::vb[context].tex0.tw ;
float fh = ZeroGS::vb[context].tex0.th ;
float fw = vb[context].tex0.tw ;
float fh = vb[context].tex0.th ;
switch (pclamp->wms)
{
@ -1966,7 +1966,7 @@ void SetTexClamping(int context, FRAGMENTSHADER* pfragment)
if (correctMinu != g_PrevBitwiseTexX)
{
g_PrevBitwiseTexX = correctMinu;
ptex[0] = ZeroGS::s_BitwiseTextures.GetTex(correctMinu, 0);
ptex[0] = s_BitwiseTextures.GetTex(correctMinu, 0);
}
break;
@ -2001,7 +2001,7 @@ void SetTexClamping(int context, FRAGMENTSHADER* pfragment)
if (correctMinv != g_PrevBitwiseTexY)
{
g_PrevBitwiseTexY = correctMinv;
ptex[1] = ZeroGS::s_BitwiseTextures.GetTex(correctMinv, ptex[0]);
ptex[1] = s_BitwiseTextures.GetTex(correctMinv, ptex[0]);
}
break;
}
@ -2015,17 +2015,8 @@ void SetTexClamping(int context, FRAGMENTSHADER* pfragment)
}
// Fixme should be in float4 lib
inline bool equal_vectors(float4 a, float4 b)
{
if (abs(a.x - b.x) + abs(a.y - b.y) + abs(a.z - b.z) + abs(a.w - b.w) < 0.01)
return true;
else
return false;
}
int CheckTexArray[4][2][2][2] = {{{{0, }}}};
void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
void SetTexVariables(int context, FRAGMENTSHADER* pfragment)
{
FUNCLOG
@ -2169,12 +2160,12 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
if ( equal_vectors(valpha, valpha3) && equal_vectors(valpha2, valpha4) ) {
if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm)] == 0) {
printf ( "Good issue %d %d %d %d\n", tex0.tfx, tex0.tcc, psm, PSMT_ALPHAEXP(psm) );
ZZLog::Debug_Log ( "Good issue %d %d %d %d", tex0.tfx, tex0.tcc, psm, PSMT_ALPHAEXP(psm) );
CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm) ] = 1;
}
}
else if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm)] == -1) {
printf ("Bad array, %d %d %d %d\n\tolf valpha %f, %f, %f, %f : valpha2 %f %f %f %f\n\tnew valpha %f, %f, %f, %f : valpha2 %f %f %f %f\n",
ZZLog::Debug_Log ("Bad array, %d %d %d %d\n\tolf valpha %f, %f, %f, %f : valpha2 %f %f %f %f\n\tnew valpha %f, %f, %f, %f : valpha2 %f %f %f %f",
tex0.tfx, tex0.tcc, psm, PSMT_ALPHAEXP(psm),
valpha3.x, valpha3.y, valpha3.z, valpha3.w, valpha4.x, valpha4.y, valpha4.z, valpha4.w,
valpha.x, valpha.y, valpha.z, valpha.w, valpha2.x, valpha2.y, valpha2.z, valpha2.w);
@ -2203,7 +2194,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
}
}
void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force)
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force)
{
FUNCLOG
float4 v;
@ -2363,16 +2354,16 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
//if( a.fix <= 0x80 ) { \
// dwTemp = (a.fix*2)>255?255:(a.fix*2); \
// dwTemp = dwTemp|(dwTemp<<8)|(dwTemp<<16)|0x80000000; \
// printf("bfactor: %8.8x\n", dwTemp); \
// ZZLog::Debug_Log("bfactor: %8.8x", dwTemp); \
// glBlendColorEXT(dwTemp); \
// } \
// else { \
void ZeroGS::ResetAlphaVariables() {
FUNCLOG
}
//void ResetAlphaVariables() {
// FUNCLOG
//}
inline void ZeroGS::NeedFactor(int w)
inline void NeedFactor(int w)
{
if (bDestAlphaColor == 2)
{
@ -2384,7 +2375,7 @@ inline void ZeroGS::NeedFactor(int w)
//static int CheckArray[48][2] = {{0,}};
void ZeroGS::SetAlphaVariables(const alphaInfo& a)
void SetAlphaVariables(const alphaInfo& a)
{
FUNCLOG
bool alphaenable = true;
@ -2849,8 +2840,8 @@ void ZeroGS::SetAlphaVariables(const alphaInfo& a)
if ( alphaenable && (t_rgbeq != s_rgbeq || s_srcrgb != t_srcrgb || t_dstrgb != s_dstrgb || tAlphaClamping != bAlphaClamping)) {
if (CheckArray[code][(bDestAlphaColor==2)] != -1) {
printf ( "A code %d, 0x%x, 0x%x, 0x%x, 0x%x %d\n", code, alpha, one_minus_alpha, one, zero, bDestAlphaColor );
printf ( " Difference %d %d %d %d | 0x%x 0x%x | 0x%x 0x%x | 0x%x 0x%x | %d %d\n",
ZZLog::Debug_Log( "A code %d, 0x%x, 0x%x, 0x%x, 0x%x %d", code, alpha, one_minus_alpha, one, zero, bDestAlphaColor );
ZZLog::Debug_Log( " Difference %d %d %d %d | 0x%x 0x%x | 0x%x 0x%x | 0x%x 0x%x | %d %d",
code, a.a, a.b, a.d,
t_rgbeq, s_rgbeq, t_srcrgb, s_srcrgb, t_dstrgb, s_dstrgb, tAlphaClamping, bAlphaClamping);
CheckArray[code][(bDestAlphaColor==2)] = -1;
@ -2858,7 +2849,7 @@ void ZeroGS::SetAlphaVariables(const alphaInfo& a)
}
else
if (CheckArray[code][(bDestAlphaColor==2)] == 0){
printf ( "Add good code %d %d, psm %d destA %d\n", code, a.c, vb[icurctx].prndr->psm, bDestAlphaColor);
ZZLog::Debug_Log( "Add good code %d %d, psm %d destA %d", code, a.c, vb[icurctx].prndr->psm, bDestAlphaColor);
CheckArray[code][(bDestAlphaColor==2)] = 1;
}*/
@ -2877,7 +2868,7 @@ void ZeroGS::SetAlphaVariables(const alphaInfo& a)
INC_ALPHAVARS();
}
void ZeroGS::SetWriteDepth()
void SetWriteDepth()
{
FUNCLOG
@ -2888,26 +2879,26 @@ void ZeroGS::SetWriteDepth()
}
}
bool ZeroGS::IsWriteDepth()
bool IsWriteDepth()
{
FUNCLOG
return s_bWriteDepth;
}
bool ZeroGS::IsWriteDestAlphaTest()
bool IsWriteDestAlphaTest()
{
FUNCLOG
return s_bDestAlphaTest;
}
void ZeroGS::SetDestAlphaTest()
void SetDestAlphaTest()
{
FUNCLOG
s_bDestAlphaTest = true;
s_nWriteDestAlphaTest = 4;
}
void ZeroGS::SetTexFlush()
void SetTexFlush()
{
FUNCLOG
s_bTexFlush = true;

View File

@ -464,7 +464,7 @@ static const u32 MAX_FRAMES = 500;
static GSFrameInfo FrameAppear[MAX_FRAMES];
static u32 Rec_Numbers = 0;
void RecordNewFrames(ZeroGS::VB& curvb, GSFrameInfo fi) {
void RecordNewFrames(VB& curvb, GSFrameInfo fi) {
if (Rec_Numbers >= MAX_FRAMES)
return;
@ -486,12 +486,12 @@ void RecordNewFrames(ZeroGS::VB& curvb, GSFrameInfo fi) {
// Dump a nice picture of the frame
char filename[255];
sprintf(filename, "SkipFlushFrame_%d__%d.tga", g_SkipFlushFrame, Rec_Numbers);
ZeroGS::SaveRenderTarget(filename, curvb.prndr->fbw, curvb.prndr->fbh, 0);
SaveRenderTarget(filename, curvb.prndr->fbw, curvb.prndr->fbh, 0);
}
}
#endif
__forceinline bool IsBadFrame(ZeroGS::VB& curvb)
__forceinline bool IsBadFrame(VB& curvb)
{
GSFrameInfo fi;

View File

@ -27,7 +27,8 @@
#define ZZOGL_FLUSH_HACK_H_INCLUDED
#include "GS.h"
#include "zerogs.h"
#include "targets.h"
#include "ZZoglVB.h"
extern int g_SkipFlushFrame;
@ -79,5 +80,5 @@ void GSC_StarOcean3(const GSFrameInfo& fi, int& skip);
void GSC_ValkyrieProfile2(const GSFrameInfo& fi, int& skip);
void GSC_RadiataStories(const GSFrameInfo& fi, int& skip);
extern bool IsBadFrame(ZeroGS::VB& curvb);
extern bool IsBadFrame(VB& curvb);
#endif

View File

@ -26,6 +26,7 @@
#endif
#include <assert.h>
#include "Pcsx2Defs.h"
//#define ZZ_MMATH
@ -194,6 +195,15 @@ class Vector4
y = ((color >> 8) & 0xff) / 255.0f;
z = ((color >> 16) & 0xff) / 255.0f;
}
bool equal_vectors(const Vector4<T>& v)
{
if (abs(x - v.x) + abs(y - v.y) + abs(z - v.z) + abs(w - v.w) < 0.01)
return true;
else
return false;
}
};
typedef Vector4<float> float4;
@ -443,6 +453,14 @@ class float4
return float4(_mm_cmple_ps(v1.m, v2.m));
}
bool equal_vectors(const float4& v)
{
if (abs(x - v.x) + abs(y - v.y) + abs(z - v.z) + abs(w - v.w) < 0.01)
return true;
else
return false;
}
// This looked interesting, so I thought I'd include it...
template<int i> float4 shuffle() const

View File

@ -20,9 +20,10 @@
// Save and Load.
//------------------ Includes
#include "zerogs.h"
#include "targets.h"
#include "Util.h"
#include "ZZoglVB.h"
extern void ZZGSStateReset();
//----------------------- Defines
#define VBSAVELIMIT ((u32)((u8*)&vb[0].nNextFrameHeight-(u8*)&vb[0]))
@ -44,7 +45,10 @@ char *libraryNameX = "ZeroGS Playground OpenGL ";
extern char *libraryName;
extern u32 s_uTex1Data[2][2], s_uClampData[2];
int ZeroGS::Save(s8* pbydata)
void SetFogColor(u32 fog);
void SetFogColor(GIFRegFOGCOL* fog);
int ZZSave(s8* pbydata)
{
if (pbydata == NULL)
return 40 + MEMORY_END + sizeof(gs) + 2*VBSAVELIMIT + 2*sizeof(frameInfo) + 4 + 256*4;
@ -85,7 +89,10 @@ int ZeroGS::Save(s8* pbydata)
return 0;
}
bool ZeroGS::Load(s8* pbydata)
extern u32 s_uFramebuffer;
extern int g_nCurVBOIndex;
bool ZZLoad(s8* pbydata)
{
memset(s_uTex1Data, 0, sizeof(s_uTex1Data));
memset(s_uClampData, 0, sizeof(s_uClampData));
@ -99,7 +106,7 @@ bool ZeroGS::Load(s8* pbydata)
{
g_MemTargs.Destroy();
GSStateReset();
ZZGSStateReset();
pbydata += 32;
//int context = *(int*)pbydata;

View File

@ -21,10 +21,11 @@
// ZZogl Shader manipulation functions.
//------------------- Includes
#include "zerogs.h"
#include "Util.h"
#include "ZZoglShaders.h"
#include "zpipe.h"
#include <math.h>
#include <map>
#ifdef _WIN32
# include "Win32.h"
@ -33,8 +34,6 @@ extern HINSTANCE hInst;
// ----------------- Defines
using namespace ZeroGS;
#define TEXWRAP_REPEAT 0
#define TEXWRAP_CLAMP 1
#define TEXWRAP_REGION_REPEAT 2
@ -68,7 +67,13 @@ using namespace ZeroGS;
//------------------ Constants
// Used in a logarithmic Z-test, as (1-o(1))/log(MAX_U32).
const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
#ifdef _DEBUG
const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex16to8h" };
#endif
const char* g_pShaders[4] = { "full", "reduced", "accurate", "accurate-reduced" };
// ----------------- Global Variables
@ -87,16 +92,20 @@ char* EFFECT_DIR;
bool g_bCRTCBilinear = true;
namespace ZeroGS {
float4 g_vdepth, vlogz;
FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
VERTEXSHADER pvsBitBlt;
float4 g_vdepth, vlogz;
FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
VERTEXSHADER pvsBitBlt;
inline bool LoadEffects();
}
extern u32 ptexBlocks; // holds information on block tiling. It's texture number in OpenGL -- if 0 than such texture
extern u32 ptexConv16to32; // does not exists. This textures should be created on start and released on finish.
extern u32 ptexBilinearBlocks;
extern u32 ptexConv32to16;
inline bool LoadEffects();
extern bool s_bWriteDepth;
struct SHADERHEADER
{
@ -219,7 +228,7 @@ bool ZZshCreateOpenShadersFile() {
// test if ps2hw.fx exists
char tempstr[255];
char curwd[255];
getcwd(curwd, ARRAY_SIZE(curwd));
getcwd(curwd, ArraySize(curwd));
strcpy(tempstr, "/plugins/");
sprintf(EFFECT_NAME, "%sps2hw.fx", tempstr);
@ -506,7 +515,7 @@ void SetupVertexProgramParameters(ZZshProgram prog, int context)
SetupFragmentProgramParameters(&fragment, !!(Index&SH_CONTEXT1), 0); \
} \
inline bool ZeroGS::LoadEffects()
inline bool LoadEffects()
{
assert( s_lpShaderResources != NULL );
@ -530,7 +539,7 @@ inline bool ZeroGS::LoadEffects()
}
// clear the textures
for(u16 i = 0; i < ARRAY_SIZE(ppsTexture); ++i) {
for(u16 i = 0; i < ArraySize(ppsTexture); ++i) {
SAFE_RELEASE_PROG(ppsTexture[i].prog);
ppsTexture[i].prog = NULL;
}
@ -637,7 +646,7 @@ FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testae
int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0);
assert( index < ARRAY_SIZE(ppsTexture) );
assert( index < ArraySize(ppsTexture) );
FRAGMENTSHADER* pf = ppsTexture+index;
if( pbFailed != NULL ) *pbFailed = false;
@ -711,10 +720,10 @@ FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testae
SET_PSFILENAME(fragment, name); \
} \
inline bool ZeroGS::LoadEffects()
inline bool LoadEffects()
{
// clear the textures
for(int i = 0; i < ARRAY_SIZE(ppsTexture); ++i) {
for(int i = 0; i < ArraySize(ppsTexture); ++i) {
SAFE_RELEASE_PROG(ppsTexture[i].prog);
}

View File

@ -29,9 +29,11 @@
// Just bitmask for different type of shaders
#define SHADER_REDUCED 1 // equivalent to ps2.0
#define SHADER_ACCURATE 2 // for older cards with less accurate math (ps2.x+)
// For output
const static char* g_pShaders[] = { "full", "reduced", "accurate", "accurate-reduced" };
#include "ZZoglMath.h"
#include "GS.h"
// For output
#define NVIDIA_CG_API
// --------------------------- API abstraction level --------------------------------
@ -54,7 +56,7 @@ inline bool ZZshActiveParameter(ZZshParameter param) {return (param !=NULL); }
#endif // end NVIDIA cg-toolkit API
const static char* g_pPsTexWrap[] = { "-DREPEAT", "-DCLAMP", "-DREGION_REPEAT", NULL };
//const static char* g_pPsTexWrap[] = { "-DREPEAT", "-DCLAMP", "-DREGION_REPEAT", NULL };
enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC};
// We have "compatible" shaders, as RegularFogVS and RegularFogPS. if don't need to wory about incompatible shaders
@ -174,7 +176,6 @@ struct VERTEXSHADER
int ParametersStart, ParametersFinish;
};
namespace ZeroGS {
extern float4 g_vdepth;
extern float4 vlogz;
extern VERTEXSHADER pvsBitBlt;
@ -183,7 +184,6 @@ namespace ZeroGS {
extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
}
// ------------------------- Functions -------------------------------
@ -225,8 +225,7 @@ extern bool ZZshLoadExtraEffects();
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
namespace ZeroGS {
// only sets a limited amount of state (for Update)
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force);
}
#endif

View File

@ -20,6 +20,7 @@
// Texture and avi saving to file functions
//------------------ Includes
#include "Util.h"
#if defined(_WIN32)
# include <windows.h>
# include <aviUtil.h>
@ -27,9 +28,9 @@
#endif
#include <stdlib.h>
#include "zerogs.h"
#include "targets.h"
#include "Mem.h"
#include "ZZoglShoots.h"
// AVI Capture
int s_avicapturing = 0;
@ -43,7 +44,7 @@ extern "C"
#define HAVE_BOOLEAN
#endif
#include "jpeglib.h" // This library want to be after zerogs.h
#include "jpeglib.h"
}
//------------------ Defines
@ -65,14 +66,14 @@ string strSnapshot;
//------------------ Code
// Set variables need to made a snapshoot when it's possible
void ZeroGS::SaveSnapshot(const char* filename)
void SaveSnapshot(const char* filename)
{
g_bMakeSnapshot = true;
strSnapshot = filename;
}
// Save curent renderer in jpeg or TGA format
bool ZeroGS::SaveRenderTarget(const char* filename, int width, int height, int jpeg)
bool SaveRenderTarget(const char* filename, int width, int height, int jpeg)
{
bool bflip = height < 0;
height = abs(height);
@ -100,7 +101,7 @@ bool ZeroGS::SaveRenderTarget(const char* filename, int width, int height, int j
}
// Save selected texture as TGA
bool ZeroGS::SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int height)
bool SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int height)
{
vector<u32> data(width*height);
glBindTexture(textarget, tex);
@ -112,7 +113,7 @@ bool ZeroGS::SaveTexture(const char* filename, u32 textarget, u32 tex, int width
}
// save image as JPEG
bool ZeroGS::SaveJPEG(const char* filename, int image_width, int image_height, const void* pdata, int quality)
bool SaveJPEG(const char* filename, int image_width, int image_height, const void* pdata, int quality)
{
u8* image_buffer = new u8[image_width * image_height * 3];
u8* psrc = (u8*)pdata;
@ -286,7 +287,7 @@ __attribute__((packed));
#endif
// Save image as TGA
bool ZeroGS::SaveTGA(const char* filename, int width, int height, void* pdata)
bool SaveTGA(const char* filename, int width, int height, void* pdata)
{
int err = 0;
TGA_HEADER hdr;
@ -314,7 +315,7 @@ bool ZeroGS::SaveTGA(const char* filename, int width, int height, void* pdata)
// AVI capture stuff
// AVI start -- set needed global variables
void ZeroGS::StartCapture()
void StartCapture()
{
if (conf.captureAvi()) return;
if (!s_aviinit)
@ -338,7 +339,7 @@ void ZeroGS::StartCapture()
}
// Stop.
void ZeroGS::StopCapture()
void StopCapture()
{
if (!conf.captureAvi()) return;
s_avicapturing = 0;
@ -347,25 +348,25 @@ void ZeroGS::StopCapture()
}
// And capture frame does not work on linux.
void ZeroGS::CaptureFrame()
void CaptureFrame()
{
if ((!s_avicapturing) || (!s_aviinit)) return;
vector<u32> data(nBackbufferWidth*nBackbufferHeight);
glReadPixels(0, 0, nBackbufferWidth, nBackbufferHeight, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
vector<u32> data(GLWin.backbuffer.w * GLWin.backbuffer.h);
glReadPixels(0, 0, GLWin.backbuffer.w, GLWin.backbuffer.h, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
if (glGetError() != GL_NO_ERROR) return;
#ifdef _WIN32
int fps = SMODE1->CMOD == 3 ? 50 : 60;
bool bSuccess = ADD_FRAME_FROM_DIB_TO_AVI("AAAA", fps, nBackbufferWidth, nBackbufferHeight, 32, &data[0]);
bool bSuccess = ADD_FRAME_FROM_DIB_TO_AVI("AAAA", fps, GLWin.backbuffer.w, GLWin.backbuffer.h, 32, &data[0]);
if (!bSuccess)
{
s_avicapturing = 0;
STOP_AVI();
ZeroGS::AddMessage("Failed to create avi");
ZZAddMessage("Failed to create avi");
return;
}
@ -376,7 +377,7 @@ void ZeroGS::CaptureFrame()
// It's nearly the same as save texture
void
ZeroGS::SaveTex(tex0Info* ptex, int usevid)
SaveTex(tex0Info* ptex, int usevid)
{
vector<u32> data(ptex->tw*ptex->th);
vector<u8> srcdata;
@ -617,7 +618,7 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid)
// Do the save texture and return file name of it
// Do not forget to call free(), other wise there would be memory leak!
char* ZeroGS::NamedSaveTex(tex0Info* ptex, int usevid)
char* NamedSaveTex(tex0Info* ptex, int usevid)
{
SaveTex(ptex, usevid);
char* Name = (char*)malloc(TGA_FILE_NAME_MAX_LENGTH);
@ -631,7 +632,7 @@ char* ZeroGS::NamedSaveTex(tex0Info* ptex, int usevid)
}
// Special function, which is safe to call from any other file, without aviutils problems.
void ZeroGS::Stop_Avi()
void Stop_Avi()
{
#ifdef _WIN32
STOP_AVI();
@ -640,7 +641,7 @@ void ZeroGS::Stop_Avi()
#endif
}
void ZeroGS::Delete_Avi_Capture()
void Delete_Avi_Capture()
{
if (s_aviinit)
{

View File

@ -0,0 +1,35 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef ZZOGLSHOOTS_H_INCLUDED
#define ZZOGLSHOOTS_H_INCLUDED
void SaveSnapshot(const char* filename);
bool SaveRenderTarget(const char* filename, int width, int height, int jpeg);
bool SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int height);
bool SaveJPEG(const char* filename, int width, int height, const void* pdata, int quality);
bool SaveTGA(const char* filename, int width, int height, void* pdata);
void Stop_Avi();
void Delete_Avi_Capture();
void StartCapture();
void StopCapture();
void CaptureFrame();
#endif // ZZOGLSHOOTS_H_INCLUDED

View File

@ -21,12 +21,13 @@
// VB stands for Visual Buffer, as I think
//------------------- Includes
#include "zerogs.h"
#include "Util.h"
#include "targets.h"
#include "ZZoglVB.h"
#include "GS.h"
#include "Mem.h"
extern float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
using namespace ZeroGS;
// ----------------- Defines
#define MINMAX_SHIFT 3
@ -37,20 +38,20 @@ int maxmin = 608;
// ----------------- Code
// Constructor. Set width and height to 1
ZeroGS::VB::VB()
VB::VB()
{
memset(this, 0, sizeof(ZeroGS::VB));
memset(this, 0, sizeof(VB));
tex0.tw = 1;
tex0.th = 1;
}
// Destructor
ZeroGS::VB::~VB()
VB::~VB()
{
Destroy();
}
void ZeroGS::VB::Destroy()
void VB::Destroy()
{
_aligned_free(pBufferData);
pBufferData = NULL;
@ -65,7 +66,7 @@ int ConstraintReason;
// Return number of 64-pixels block, that guaranted could be hold in memory
// from gsfb.fbp and tbp (textrure pase), zbuf.zbp (Z-buffer), frame.fbp
// (previous frame).
inline int ZeroGS::VB::FindMinimalMemoryConstrain(int tbp, int maxpos)
inline int VB::FindMinimalMemoryConstrain(int tbp, int maxpos)
{
int MinConstraint = maxpos;
@ -116,7 +117,7 @@ inline int ZeroGS::VB::FindMinimalMemoryConstrain(int tbp, int maxpos)
// Return number of 64 pizel words that could be placed in Z-Buffer
// If no Z-buffer present return old constraint
inline int ZeroGS::VB::FindZbufferMemoryConstrain(int tbp, int maxpos)
inline int VB::FindZbufferMemoryConstrain(int tbp, int maxpos)
{
int MinConstraint = maxpos;
@ -147,7 +148,7 @@ inline int GetScissorY(int y)
//There is several reasons to limit a height of frame: maximum buffer size, calculated size
//from fbw and fbh and scissoring.
inline int ZeroGS::VB::FindMinimalHeightConstrain(int maxpos)
inline int VB::FindMinimalHeightConstrain(int maxpos)
{
int MinConstraint = maxpos;
@ -176,7 +177,7 @@ inline int ZeroGS::VB::FindMinimalHeightConstrain(int maxpos)
// 32 bit frames have additional constraints to frame
// maxpos was maximum length of frame at normal constraints
inline void ZeroGS::VB::CheckFrame32bitRes(int maxpos)
inline void VB::CheckFrame32bitRes(int maxpos)
{
int fbh = frame.fbh;
@ -215,7 +216,7 @@ inline void ZeroGS::VB::CheckFrame32bitRes(int maxpos)
// 4Mb memory in 64 bit (4 bytes) words.
// |------------------------|---------------------|----------|----------|---------------------|
// 0 gsfb.fbp zbuff.zpb tbp frame.fbp 2^20/64
inline int ZeroGS::VB::CheckFrameAddConstraints(int tbp)
inline int VB::CheckFrameAddConstraints(int tbp)
{
if (gsfb.fbw <= 0)
{
@ -263,7 +264,7 @@ inline int ZeroGS::VB::CheckFrameAddConstraints(int tbp)
// Check if after resizing new depth target is needed to be used.
// it returns 2 if a new depth target is used.
inline int ZeroGS::VB::CheckFrameResolveDepth(int tbp)
inline int VB::CheckFrameResolveDepth(int tbp)
{
int result = 0;
CDepthTarget* pprevdepth = pdepth;
@ -289,7 +290,7 @@ inline int ZeroGS::VB::CheckFrameResolveDepth(int tbp)
// Check if after resizing, a new render target is needed to be used. Also perform deptarget check.
// Returns 1 if only 1 render target is changed and 3 -- if both.
inline int ZeroGS::VB::CheckFrameResolveRender(int tbp)
inline int VB::CheckFrameResolveRender(int tbp)
{
int result = 0;
@ -335,7 +336,7 @@ inline int ZeroGS::VB::CheckFrameResolveRender(int tbp)
}
// After frame resetting, it is possible that 16 to 32 or 32 to 16 (color bits) conversion should be made.
inline void ZeroGS::VB::CheckFrame16vs32Conversion()
inline void VB::CheckFrame16vs32Conversion()
{
if (prndr->status & CRenderTarget::TS_NeedConvert32)
{
@ -355,9 +356,11 @@ inline void ZeroGS::VB::CheckFrame16vs32Conversion()
}
}
void SetContextTarget(int context);
// A lot of times, the target is too big and overwrites the texture.
// If tbp != 0, use it to bound.
void ZeroGS::VB::CheckFrame(int tbp)
void VB::CheckFrame(int tbp)
{
GL_REPORT_ERRORD();
@ -388,7 +391,7 @@ void ZeroGS::VB::CheckFrame(int tbp)
if ((prndr != NULL) && (prndr->psm != gsfb.psm))
{
// behavior for dest alpha varies
ResetAlphaVariables();
// ResetAlphaVariables();
}
bChanged = CheckFrameResolveRender(tbp);
@ -407,11 +410,11 @@ void ZeroGS::VB::CheckFrame(int tbp)
}
// This is the case, most easy to perform, when nothing was changed
inline void ZeroGS::VB::FlushTexUnchangedClutDontUpdate()
inline void VB::FlushTexUnchangedClutDontUpdate()
{
if (ZZOglGet_cld_TexBits(uNextTex0Data[1]))
{
ZeroGS::texClutWrite(ictx);
texClutWrite(ictx);
// invalidate to make sure target didn't change!
bVarsTexSync = false;
}
@ -419,9 +422,9 @@ inline void ZeroGS::VB::FlushTexUnchangedClutDontUpdate()
// The second of easy branch. We does not change storage model, so we don't need to
// update anything except texture itself
inline void ZeroGS::VB::FlushTexClutDontUpdate()
inline void VB::FlushTexClutDontUpdate()
{
if (!ZZOglClutStorageUnchanged(uCurTex0Data, uNextTex0Data)) ZeroGS::Flush(ictx);
if (!ZZOglClutStorageUnchanged(uCurTex0Data, uNextTex0Data)) Flush(ictx);
// clut memory isn't going to be loaded so can ignore, but at least update CSA and CPSM!
uCurTex0Data[1] = (uCurTex0Data[1] & CPSM_CSA_NOTMASK) | (uNextTex0Data[1] & CPSM_CSA_BITMASK);
@ -429,14 +432,14 @@ inline void ZeroGS::VB::FlushTexClutDontUpdate()
tex0.csa = ZZOglGet_csa_TexBits(uNextTex0Data[1]);
tex0.cpsm = ZZOglGet_cpsm_TexBits(uNextTex0Data[1]);
ZeroGS::texClutWrite(ictx);
texClutWrite(ictx);
bVarsTexSync = false;
}
// Set texture variables after big change
inline void ZeroGS::VB::FlushTexSetNewVars(u32 psm)
inline void VB::FlushTexSetNewVars(u32 psm)
{
tex0.tbp0 = ZZOglGet_tbp0_TexBits(uNextTex0Data[0]);
tex0.tbw = ZZOglGet_tbw_TexBitsMult(uNextTex0Data[0]);
@ -447,13 +450,13 @@ inline void ZeroGS::VB::FlushTexSetNewVars(u32 psm)
tex0.tcc = ZZOglGet_tcc_TexBits(uNextTex0Data[1]);
tex0.tfx = ZZOglGet_tfx_TexBits(uNextTex0Data[1]);
ZeroGS::fiTexWidth[ictx] = (1 / 16.0f) / tex0.tw;
ZeroGS::fiTexHeight[ictx] = (1 / 16.0f) / tex0.th;
fiTexWidth[ictx] = (1 / 16.0f) / tex0.tw;
fiTexHeight[ictx] = (1 / 16.0f) / tex0.th;
}
// Flush == draw on screen
// This function made VB state consistant before real Flush.
void ZeroGS::VB::FlushTexData()
void VB::FlushTexData()
{
GL_REPORT_ERRORD();
@ -487,7 +490,7 @@ void ZeroGS::VB::FlushTexData()
}
// Made the full update
ZeroGS::Flush(ictx);
Flush(ictx);
bVarsTexSync = false;
bTexConstsSync = false;
@ -497,7 +500,7 @@ void ZeroGS::VB::FlushTexData()
FlushTexSetNewVars(psm);
if (PSMT_ISCLUT(psm)) ZeroGS::CluttingForFlushedTex(&tex0, uNextTex0Data[1], ictx) ;
if (PSMT_ISCLUT(psm)) CluttingForFlushedTex(&tex0, uNextTex0Data[1], ictx) ;
GL_REPORT_ERRORD();
}
}

View File

@ -0,0 +1,158 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
// Zerogs:VB implementation.
// VB stands for Visual Buffer, as I think
#ifndef ZZOGLVB_H_INCLUDED
#define ZZOGLVB_H_INCLUDED
#include "targets.h"
extern const GLenum primtype[8];
class VB
{
public:
VB();
~VB();
void Destroy();
inline bool CheckPrim()
{
static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA)
if ((PRIMMASK & prim->_val) != (PRIMMASK & curprim._val) || primtype[prim->prim] != primtype[curprim.prim])
return nCount > 0;
return false;
}
void SetCurrentPrim()
{
curprim._val = prim->_val;
curprim.prim = prim->prim;
}
void CheckFrame(int tbp);
// context specific state
Point offset;
Rect2 scissor;
tex0Info tex0;
tex1Info tex1;
miptbpInfo miptbp0;
miptbpInfo miptbp1;
alphaInfo alpha;
fbaInfo fba;
clampInfo clamp;
pixTest test;
u32 ptexClamp[2]; // textures for x and y dir region clamping
void FlushTexData();
inline int CheckFrameAddConstraints(int tbp);
inline void CheckScissors(int maxpos);
inline void CheckFrame32bitRes(int maxpos);
inline int FindMinimalMemoryConstrain(int tbp, int maxpos);
inline int FindZbufferMemoryConstrain(int tbp, int maxpos);
inline int FindMinimalHeightConstrain(int maxpos);
inline int CheckFrameResolveRender(int tbp);
inline void CheckFrame16vs32Conversion();
inline int CheckFrameResolveDepth(int tbp);
inline void FlushTexUnchangedClutDontUpdate() ;
inline void FlushTexClutDontUpdate() ;
inline void FlushTexClutting() ;
inline void FlushTexSetNewVars(u32 psm) ;
// Increase the size of pbuf
void IncreaseVertexBuffer()
{
assert(pBufferData != NULL);
nNumVertices *= 2;
VertexGPU* ptemp = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nNumVertices, 256);
memcpy_amd(ptemp, pBufferData, sizeof(VertexGPU) * nCount);
assert(nCount <= nNumVertices);
_aligned_free(pBufferData);
pBufferData = ptemp;
}
void Init(int nVerts)
{
if (pBufferData == NULL && nVerts > 0)
{
pBufferData = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nVerts, 256);
nNumVertices = nVerts;
}
nCount = 0;
}
u8 bNeedFrameCheck;
u8 bNeedZCheck;
u8 bNeedTexCheck;
u8 dummy0;
union
{
struct
{
u8 bTexConstsSync; // only pixel shader constants that context owns
u8 bVarsTexSync; // texture info
u8 bVarsSetTarg;
u8 dummy1;
};
u32 bSyncVars;
};
int ictx;
VertexGPU* pBufferData; // current allocated data
int nNumVertices; // size of pBufferData in terms of VertexGPU objects
int nCount;
primInfo curprim; // the previous prim the current buffers are set to
zbufInfo zbuf;
frameInfo gsfb; // the real info set by FRAME cmd
frameInfo frame;
int zprimmask; // zmask for incoming points
union
{
u32 uCurTex0Data[2]; // current tex0 data
GIFRegTEX0 uCurTex0;
};
u32 uNextTex0Data[2]; // tex0 data that has to be applied if bNeedTexCheck is 1
//int nFrameHeights[8]; // frame heights for the past frame changes
int nNextFrameHeight;
CMemoryTarget* pmemtarg; // the current mem target set
CRenderTarget* prndr;
CDepthTarget* pdepth;
};
// VB variables
extern VB vb[2];
#endif // ZZOGLVB_H_INCLUDED

View File

@ -1,480 +0,0 @@
/******************************************************************************
Copyright (c) 2001 Advanced Micro Devices, Inc.
LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY
EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY,
NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY
PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY
DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS,
BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR
INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY
OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION
OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY
NOT APPLY TO YOU.
AMD does not assume any responsibility for any errors which may appear in the
Materials nor any responsibility to support or update the Materials. AMD retains
the right to make changes to its test specifications at any time, without notice.
NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any
further information, software, technical information, know-how, or show-how
available to you.
So that all may benefit from your experience, please report any problems
or suggestions about this software to 3dsdk.support@amd.com
AMD Developer Technologies, M/S 585
Advanced Micro Devices, Inc.
5900 E. Ben White Blvd.
Austin, TX 78741
3dsdk.support@amd.com
******************************************************************************/
#include <assert.h>
/*****************************************************************************
MEMCPY_AMD.CPP
******************************************************************************/
// Very optimized memcpy() routine for AMD Athlon and Duron family.
// This code uses any of FOUR different basic copy methods, depending
// on the transfer size.
// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or
// "Streaming Store"), and also uses the software prefetch instructions,
// be sure you're running on Athlon/Duron or other recent CPU before calling!
#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy
// The smallest copy uses the X86 "movsd" instruction, in an optimized
// form which is an "unrolled loop".
#define IN_CACHE_COPY 2 * 1024 // upper limit for movq/movq copy w/SW prefetch
// Next is a copy that uses the MMX registers to copy 8 bytes at a time,
// also using the "unrolled loop" optimization. This code uses
// the software prefetch instruction to get the data into the cache.
#define UNCACHED_COPY 4 * 1024 // upper limit for movq/movntq w/SW prefetch
// For larger blocks, which will spill beyond the cache, it's faster to
// use the Streaming Store instruction MOVNTQ. This write instruction
// bypasses the cache and writes straight to main memory. This code also
// uses the software prefetch instruction to pre-read the data.
// USE 64 * 1024 FOR THIS VALUE IF YOU'RE ALWAYS FILLING A "CLEAN CACHE"
#define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch
#define CACHEBLOCK 80h // number of 64-byte blocks (cache lines) for block prefetch
// For the largest size blocks, a special technique called Block Prefetch
// can be used to accelerate the read operations. Block Prefetch reads
// one address per cache line, for a series of cache lines, in a short loop.
// This is faster than using software prefetch. The technique is great for
// getting maximum read bandwidth, especially in DDR memory systems.
//#include <stddef.h>
// Inline assembly syntax for use with Visual C++
#ifdef _WIN32
#include <windows.h>
#endif
#if defined(_MSC_VER) && !defined(__x86_64__)
#include "PS2Etypes.h"
extern "C" {
void * memcpy_amd(void *dest, const void *src, size_t n)
{
__asm {
mov ecx, [n] ; number of bytes to copy
mov edi, [dest] ; destination
mov esi, [src] ; source
mov ebx, ecx ; keep a copy of count
cld
cmp ecx, TINY_BLOCK_COPY
jb $memcpy_ic_3 ; tiny? skip mmx copy
cmp ecx, 32*1024 ; don't align between 32k-64k because
jbe $memcpy_do_align ; it appears to be slower
cmp ecx, 64*1024
jbe $memcpy_align_done
$memcpy_do_align:
mov ecx, 8 ; a trick that's faster than rep movsb...
sub ecx, edi ; align destination to qword
and ecx, 111b ; get the low bits
sub ebx, ecx ; update copy count
neg ecx ; set up to jump into the array
add ecx, offset $memcpy_align_done
jmp ecx ; jump to array of movsb's
align 4
movsb
movsb
movsb
movsb
movsb
movsb
movsb
movsb
$memcpy_align_done: ; destination is dword aligned
mov ecx, ebx ; number of bytes left to copy
shr ecx, 6 ; get 64-byte block count
jz $memcpy_ic_2 ; finish the last few bytes
cmp ecx, IN_CACHE_COPY/64 ; too big 4 cache? use uncached copy
jae $memcpy_uc_test
// This is small block copy that uses the MMX registers to copy 8 bytes
// at a time. It uses the "unrolled loop" optimization, and also uses
// the software prefetch instruction to get the data into the cache.
align 16
$memcpy_ic_1: ; 64-byte block copies, in-cache copy
prefetchnta [esi + (200*64/34+192)] ; start reading ahead
movq mm0, [esi+0] ; read 64 bits
movq mm1, [esi+8]
movq [edi+0], mm0 ; write 64 bits
movq [edi+8], mm1 ; note: the normal movq writes the
movq mm2, [esi+16] ; data to cache; a cache line will be
movq mm3, [esi+24] ; allocated as needed, to store the data
movq [edi+16], mm2
movq [edi+24], mm3
movq mm0, [esi+32]
movq mm1, [esi+40]
movq [edi+32], mm0
movq [edi+40], mm1
movq mm2, [esi+48]
movq mm3, [esi+56]
movq [edi+48], mm2
movq [edi+56], mm3
add esi, 64 ; update source pointer
add edi, 64 ; update destination pointer
dec ecx ; count down
jnz $memcpy_ic_1 ; last 64-byte block?
$memcpy_ic_2:
mov ecx, ebx ; has valid low 6 bits of the byte count
$memcpy_ic_3:
shr ecx, 2 ; dword count
and ecx, 1111b ; only look at the "remainder" bits
neg ecx ; set up to jump into the array
add ecx, offset $memcpy_last_few
jmp ecx ; jump to array of movsd's
$memcpy_uc_test:
cmp ecx, UNCACHED_COPY/64 ; big enough? use block prefetch copy
jae $memcpy_bp_1
$memcpy_64_test:
or ecx, ecx ; tail end of block prefetch will jump here
jz $memcpy_ic_2 ; no more 64-byte blocks left
// For larger blocks, which will spill beyond the cache, it's faster to
// use the Streaming Store instruction MOVNTQ. This write instruction
// bypasses the cache and writes straight to main memory. This code also
// uses the software prefetch instruction to pre-read the data.
align 16
$memcpy_uc_1: ; 64-byte blocks, uncached copy
prefetchnta [esi + (200*64/34+192)] ; start reading ahead
movq mm0,[esi+0] ; read 64 bits
add edi,64 ; update destination pointer
movq mm1,[esi+8]
add esi,64 ; update source pointer
movq mm2,[esi-48]
movntq [edi-64], mm0 ; write 64 bits, bypassing the cache
movq mm0,[esi-40] ; note: movntq also prevents the CPU
movntq [edi-56], mm1 ; from READING the destination address
movq mm1,[esi-32] ; into the cache, only to be over-written
movntq [edi-48], mm2 ; so that also helps performance
movq mm2,[esi-24]
movntq [edi-40], mm0
movq mm0,[esi-16]
movntq [edi-32], mm1
movq mm1,[esi-8]
movntq [edi-24], mm2
movntq [edi-16], mm0
dec ecx
movntq [edi-8], mm1
jnz $memcpy_uc_1 ; last 64-byte block?
jmp $memcpy_ic_2 ; almost done
// For the largest size blocks, a special technique called Block Prefetch
// can be used to accelerate the read operations. Block Prefetch reads
// one address per cache line, for a series of cache lines, in a short loop.
// This is faster than using software prefetch. The technique is great for
// getting maximum read bandwidth, especially in DDR memory systems.
$memcpy_bp_1: ; large blocks, block prefetch copy
cmp ecx, CACHEBLOCK ; big enough to run another prefetch loop?
jl $memcpy_64_test ; no, back to regular uncached copy
mov eax, CACHEBLOCK / 2 ; block prefetch loop, unrolled 2X
add esi, CACHEBLOCK * 64 ; move to the top of the block
align 16
$memcpy_bp_2:
mov edx, [esi-64] ; grab one address per cache line
mov edx, [esi-128] ; grab one address per cache line
sub esi, 128 ; go reverse order to suppress HW prefetcher
dec eax ; count down the cache lines
jnz $memcpy_bp_2 ; keep grabbing more lines into cache
mov eax, CACHEBLOCK ; now that it's in cache, do the copy
align 16
$memcpy_bp_3:
movq mm0, [esi ] ; read 64 bits
movq mm1, [esi+ 8]
movq mm2, [esi+16]
movq mm3, [esi+24]
movq mm4, [esi+32]
movq mm5, [esi+40]
movq mm6, [esi+48]
movq mm7, [esi+56]
add esi, 64 ; update source pointer
movntq [edi ], mm0 ; write 64 bits, bypassing cache
movntq [edi+ 8], mm1 ; note: movntq also prevents the CPU
movntq [edi+16], mm2 ; from READING the destination address
movntq [edi+24], mm3 ; into the cache, only to be over-written,
movntq [edi+32], mm4 ; so that also helps performance
movntq [edi+40], mm5
movntq [edi+48], mm6
movntq [edi+56], mm7
add edi, 64 ; update dest pointer
dec eax ; count down
jnz $memcpy_bp_3 ; keep copying
sub ecx, CACHEBLOCK ; update the 64-byte block count
jmp $memcpy_bp_1 ; keep processing chunks
// The smallest copy uses the X86 "movsd" instruction, in an optimized
// form which is an "unrolled loop". Then it handles the last few bytes.
align 4
movsd
movsd ; perform last 1-15 dword copies
movsd
movsd
movsd
movsd
movsd
movsd
movsd
movsd ; perform last 1-7 dword copies
movsd
movsd
movsd
movsd
movsd
movsd
$memcpy_last_few: ; dword aligned from before movsd's
mov ecx, ebx ; has valid low 2 bits of the byte count
and ecx, 11b ; the last few cows must come home
jz $memcpy_final ; no more, let's leave
rep movsb ; the last 1, 2, or 3 bytes
$memcpy_final:
emms ; clean up the MMX state
sfence ; flush the write buffer
mov eax, [dest] ; ret value = destination pointer
}
}
// mmx memcpy implementation, size has to be a multiple of 8
// returns 0 is equal, nonzero value if not equal
// ~10 times faster than standard memcmp
// (zerofrog)
u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize)
{
assert( (cmpsize&7) == 0 );
__asm {
push esi
mov ecx, cmpsize
mov edx, src1
mov esi, src2
cmp ecx, 32
jl Done4
// custom test first 8 to make sure things are ok
movq mm0, [esi]
movq mm1, [esi+8]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pand mm0, mm1
movq mm2, [esi+16]
pmovmskb eax, mm0
movq mm3, [esi+24]
// check if eq
cmp eax, 0xff
je NextComp
mov eax, 1
jmp End
NextComp:
pcmpeqd mm2, [edx+16]
pcmpeqd mm3, [edx+24]
pand mm2, mm3
pmovmskb eax, mm2
sub ecx, 32
add esi, 32
add edx, 32
// check if eq
cmp eax, 0xff
je ContinueTest
mov eax, 1
jmp End
cmp ecx, 64
jl Done8
Cmp8:
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
movq mm3, [esi+24]
movq mm4, [esi+32]
movq mm5, [esi+40]
movq mm6, [esi+48]
movq mm7, [esi+56]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pcmpeqd mm2, [edx+16]
pcmpeqd mm3, [edx+24]
pand mm0, mm1
pcmpeqd mm4, [edx+32]
pand mm0, mm2
pcmpeqd mm5, [edx+40]
pand mm0, mm3
pcmpeqd mm6, [edx+48]
pand mm0, mm4
pcmpeqd mm7, [edx+56]
pand mm0, mm5
pand mm0, mm6
pand mm0, mm7
pmovmskb eax, mm0
// check if eq
cmp eax, 0xff
je Continue
mov eax, 1
jmp End
Continue:
sub ecx, 64
add esi, 64
add edx, 64
ContinueTest:
cmp ecx, 64
jge Cmp8
Done8:
test ecx, 0x20
jz Done4
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
movq mm3, [esi+24]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pcmpeqd mm2, [edx+16]
pcmpeqd mm3, [edx+24]
pand mm0, mm1
pand mm0, mm2
pand mm0, mm3
pmovmskb eax, mm0
sub ecx, 32
add esi, 32
add edx, 32
// check if eq
cmp eax, 0xff
je Done4
mov eax, 1
jmp End
Done4:
cmp ecx, 24
jne Done2
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pcmpeqd mm2, [edx+16]
pand mm0, mm1
pand mm0, mm2
pmovmskb eax, mm0
// check if eq
cmp eax, 0xff
setne al
jmp End
Done2:
cmp ecx, 16
jne Done1
movq mm0, [esi]
movq mm1, [esi+8]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pand mm0, mm1
pmovmskb eax, mm0
// check if eq
cmp eax, 0xff
setne al
jmp End
Done1:
cmp ecx, 8
jne Done
mov eax, [esi]
mov esi, [esi+4]
cmp eax, [edx]
je Next
mov eax, 1
jmp End
Next:
cmp esi, [edx+4]
setne al
jmp End
Done:
xor eax, eax
End:
pop esi
emms
}
}
}
#else // _MSC_VER
// assume gcc or mingw or win x64
#include <memory.h>
#include <string.h>
#include "PS2Etypes.h"
#include "Utilities/MemcpyFast.h"
//void * memcpy_amd(void *dest, const void *src, size_t n)
//{
// //memcpy(dest, src, n);
// memcpy_fast(dest, src, n);
// return dest;
//}
#define memcpy_amd memcpy_fast
#endif

File diff suppressed because it is too large Load Diff

View File

@ -22,18 +22,214 @@
#define TARGET_VIRTUAL_KEY 0x80000000
#include "PS2Edefs.h"
#include <list>
#include <map>
#include "GS.h"
#include "ZZGl.h"
//#include "ZZoglVB.h"
#ifndef GL_TEXTURE_RECTANGLE
#define GL_TEXTURE_RECTANGLE GL_TEXTURE_RECTANGLE_NV
#endif
namespace ZeroGS
#define VB_BUFFERSIZE 0x4000
// all textures have this width
extern int GPU_TEXWIDTH;
extern float g_fiGPU_TEXWIDTH;
#define MASKDIVISOR 0 // Used for decrement bitwise mask texture size if 1024 is too big
#define GPU_TEXMASKWIDTH (1024 >> MASKDIVISOR) // bitwise mask width for region repeat mode
// managers render-to-texture targets
class CRenderTarget
{
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb);
public:
CRenderTarget();
virtual ~CRenderTarget();
virtual bool Create(const frameInfo& frame);
virtual void Destroy();
// set the GPU_POSXY variable, scissor rect, and current render target
void SetTarget(int fbplocal, const Rect2& scissor, int context);
void SetViewport();
// copies/creates the feedback contents
inline void CreateFeedback()
{
if (ptexFeedback == 0 || !(status&TS_FeedbackReady))
_CreateFeedback();
}
virtual void Resolve();
virtual void Resolve(int startrange, int endrange); // resolves only in the allowed range
virtual void Update(int context, CRenderTarget* pdepth);
virtual void ConvertTo32(); // converts a psm==2 target, to a psm==0
virtual void ConvertTo16(); // converts a psm==0 target, to a psm==2
virtual bool IsDepth() { return false; }
void SetRenderTarget(int targ);
void* psys; // system data used for comparison
u32 ptex;
int fbp, fbw, fbh, fbhCalc; // if fbp is negative, virtual target (not mapped to any real addr)
int start, end; // in bytes
u32 lastused; // time stamp since last used
float4 vposxy;
u32 fbm;
u16 status;
u8 psm;
u8 resv0;
Rect scissorrect;
u8 created; // Check for object destruction/creating for r201.
//int startresolve, endresolve;
u32 nUpdateTarg; // use this target to update the texture if non 0 (one time only)
// this is optionally used when feedback effects are used (render target is used as a texture when rendering to itself)
u32 ptexFeedback;
enum TargetStatus
{
TS_Resolved = 1,
TS_NeedUpdate = 2,
TS_Virtual = 4, // currently not mapped to memory
TS_FeedbackReady = 8, // feedback effect is ready and doesn't need to be updated
TS_NeedConvert32 = 16,
TS_NeedConvert16 = 32,
};
inline float4 DefaultBitBltPos();
inline float4 DefaultBitBltTex();
private:
void _CreateFeedback();
inline bool InitialiseDefaultTexture(u32 *p_ptr, int fbw, int fbh) ;
};
// manages zbuffers
class CDepthTarget : public CRenderTarget
{
public:
CDepthTarget();
virtual ~CDepthTarget();
virtual bool Create(const frameInfo& frame);
virtual void Destroy();
virtual void Resolve();
virtual void Resolve(int startrange, int endrange); // resolves only in the allowed range
virtual void Update(int context, CRenderTarget* prndr);
virtual bool IsDepth() { return true; }
void SetDepthStencilSurface();
u32 pdepth; // 24 bit, will contain the stencil buffer if possible
u32 pstencil; // if not 0, contains the stencil buffer
int icount; // internal counter
};
// manages contiguous chunks of memory (width is always 1024)
class CMemoryTarget
{
public:
struct TEXTURE
{
inline TEXTURE() : tex(0), memptr(NULL), ref(0) {}
inline ~TEXTURE() { glDeleteTextures(1, &tex); _aligned_free(memptr); }
u32 tex;
u8* memptr; // GPU memory used for comparison
int ref;
};
inline CMemoryTarget() : ptex(NULL), starty(0), height(0), realy(0), realheight(0), usedstamp(0), psm(0), cpsm(0), channels(0), clearminy(0), clearmaxy(0), validatecount(0), clut(NULL), clutsize(0) {}
inline CMemoryTarget(const CMemoryTarget& r)
{
ptex = r.ptex;
if (ptex != NULL) ptex->ref++;
starty = r.starty;
height = r.height;
realy = r.realy;
realheight = r.realheight;
usedstamp = r.usedstamp;
psm = r.psm;
cpsm = r.cpsm;
clut = r.clut;
clearminy = r.clearminy;
clearmaxy = r.clearmaxy;
widthmult = r.widthmult;
texH = r.texH;
texW = r.texW;
channels = r.channels;
validatecount = r.validatecount;
fmt = r.fmt;
}
~CMemoryTarget() { Destroy(); }
inline void Destroy()
{
if (ptex != NULL && ptex->ref > 0)
{
if (--ptex->ref <= 0) delete ptex;
}
ptex = NULL;
_aligned_free(clut);
clut = NULL;
clutsize = 0;
}
// returns true if clut data is synced
bool ValidateClut(const tex0Info& tex0);
// returns true if tex data is synced
bool ValidateTex(const tex0Info& tex0, int starttex, int endtex, bool bDeleteBadTex);
// realy is offset in pixels from start of valid region
// so texture in memory is [realy,starty+height]
// valid texture is [starty,starty+height]
// offset in mem [starty-realy, height]
TEXTURE* ptex; // can be 16bit
int starty, height; // assert(starty >= realy)
int realy, realheight; // this is never touched once allocated
// realy is start pointer of data in 4M data block (start) and size (end-start).
u32 usedstamp;
u8 psm, cpsm; // texture and clut format. For psm, only 16bit/32bit differentiation matters
u32 fmt;
int widthmult; // Either 1 or 2.
int channels; // The number of pixels per PSM format word. channels == PIXELS_PER_WORD(psm)
// This is the real drawing size in pixels of the texture in renderbuffer.
int texW; // (realheight + widthmult - 1)/widthmult == realheight or [(realheight+1)/2]
int texH; // GPU_TEXWIDTH *widthmult * channels;
int clearminy, clearmaxy; // when maxy > 0, need to check for clearing
int validatecount; // count how many times has been validated, if too many, destroy
u8* clut; // Clut texture data. Null otherwise
int clutsize; // size of the clut array. 0 otherwise
};
inline u32 GetFrameKey(int fbp, int fbw);
// manages render targets
class CRenderTargetMngr
{
public:
@ -54,22 +250,22 @@ class CRenderTargetMngr
bool isFound(const frameInfo& frame, MAPTARGETS::iterator& it, u32 opts, u32 key, int maxposheight);
CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight);
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb)
inline CRenderTarget* GetTarg(int fbp, int fbw)
{
MAPTARGETS::iterator it = mapTargets.find(GetFrameKey(fbp, fbw, curvb));
MAPTARGETS::iterator it = mapTargets.find(GetFrameKey(fbp, fbw));
/* if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end())
{
printf("%x\n", GetFrameKey(fbp, fbw, curvb)) ;
printf("%x %x\n", fbp, fbw);
ZZLog::Debug_Log("%x", GetFrameKey(fbp, fbw)) ;
ZZLog::Debug_Log("%x %x", fbp, fbw);
for(MAPTARGETS::iterator it1 = mapTargets.begin(); it1 != mapTargets.end(); ++it1)
printf ("\t %x %x %x %x\n", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp);
ZZLog::Debug_Log("\t %x %x %x %x", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp);
}*/
return it != mapTargets.end() ? it->second : NULL;
}
// gets all targets with a range
void GetTargs(int start, int end, list<ZeroGS::CRenderTarget*>& listTargets) const;
void GetTargs(int start, int end, list<CRenderTarget*>& listTargets) const;
// resolves all targets within a range
__forceinline void Resolve(int start, int end);
@ -125,9 +321,9 @@ class CMemoryTargetMngr
CMemoryTargetMngr() : curstamp(0) {}
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
CMemoryTarget* SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
CMemoryTarget* SearchExistTarget(int start, int end, int clutsize, const tex0Info& tex0, int forcevalidate);
CMemoryTarget* ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
int CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize, int nClutOffset);
int CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize);
void Destroy(); // destroy all targs
@ -140,7 +336,7 @@ class CMemoryTargetMngr
private:
list<CMemoryTarget>::iterator DestroyTargetIter(list<CMemoryTarget>::iterator& it);
void GetClutVariables(int& nClutOffset, int& clutsize, const tex0Info& tex0);
void GetClutVariables(int& clutsize, const tex0Info& tex0);
void GetMemAddress(int& start, int& end, const tex0Info& tex0);
};
@ -202,6 +398,7 @@ class CRangeManager
extern CRenderTargetMngr s_RTs, s_DepthRTs;
extern CBitwiseTextureMngr s_BitwiseTextures;
extern CMemoryTargetMngr g_MemTargs;
extern CRangeManager s_RangeMngr; // manages overwritten memory
//extern u8 s_AAx, s_AAy;
extern Point AA;
@ -218,15 +415,15 @@ inline int RH(int tbh)
return (tbh << AA.y);
}
/* inline void CreateTargetsList(int start, int end, list<ZeroGS::CRenderTarget*>& listTargs) {
/* inline void CreateTargetsList(int start, int end, list<CRenderTarget*>& listTargs) {
s_DepthRTs.GetTargs(start, end, listTargs);
s_RTs.GetTargs(start, end, listTargs);
}*/
// This pattern of functions is called 3 times, so I add creating Targets list into one.
inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end)
inline list<CRenderTarget*> CreateTargetsList(int start, int end)
{
list<ZeroGS::CRenderTarget*> listTargs;
list<CRenderTarget*> listTargs;
s_DepthRTs.GetTargs(start, end, listTargs);
s_RTs.GetTargs(start, end, listTargs);
return listTargs;
@ -250,7 +447,7 @@ inline u32 GetFrameKey(CRenderTarget* frame)
return (((frame->fbw) << 16) | (frame->fbp));
}
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb)
inline u32 GetFrameKey(int fbp, int fbw)
{
return (((fbw) << 16) | (fbp));
}
@ -290,8 +487,6 @@ inline u32 GetFrameKeyDummy(CRenderTarget* frame)
return GetFrameKeyDummy(frame->fbp, frame->fbw, frame->fbh, frame->psm);
}
} // End of namespace
#include "Mem.h"
static __forceinline void DrawTriangleArray()
@ -316,6 +511,11 @@ static __forceinline void FBTexture(int attach, int id = 0)
GL_REPORT_ERRORD();
}
static __forceinline void ResetRenderTarget(int index)
{
FBTexture(index);
}
static __forceinline void Texture2D(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
{
glTexImage2D(GL_TEXTURE_2D, 0, iFormat, width, height, 0, format, type, pixels);
@ -389,4 +589,19 @@ static __forceinline void setRectWrap2(GLint type)
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, type);
}
//------------------------ Inlines -------------------------
// Calculate maximum height for target
inline int get_maxheight(int fbp, int fbw, int psm)
{
int ret;
if (fbw == 0) return 0;
ret = (((0x00100000 - 64 * fbp) / fbw) & ~0x1f);
if (PSMT_ISHALF(psm)) ret *= 2;
return ret;
}
#endif

View File

@ -594,6 +594,7 @@ void __fastcall Frame16SwizzleBlock16ZA4_c(u16* dst, Vector_16F* src, int srcpit
// }
//}
#if 0
extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut)
{
__m128i* src = (__m128i*)vm;
@ -626,6 +627,7 @@ extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut)
}
}
extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut)
{
__m128i* src = (__m128i*)vm;
@ -642,13 +644,116 @@ extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut)
_mm_store_si128(&dst[3], _mm_unpackhi_epi64(r2, r3));
}
static const __aligned16 int s_clut_16bits_mask[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
static const __aligned16 int s_clut16mask2[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
static const __aligned16 int s_clut16mask[8] = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000,
0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff
};
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut)
template<bool CSA_0_15, bool HIGH_16BITS_VM>
void __fastcall WriteCLUT_T16_I4_CSM1_core_sse2(u32* vm, u32* clut)
{
__m128i vm_0;
__m128i vm_1;
__m128i vm_2;
__m128i vm_3;
__m128i clut_0;
__m128i clut_1;
__m128i clut_2;
__m128i clut_3;
__m128i clut_mask = _mm_load_si128((__m128i*)s_clut_16bits_mask);
// !HIGH_16BITS_VM
// CSA in 0-15
// Replace lower 16 bits of clut0 with lower 16 bits of vm
// CSA in 16-31
// Replace higher 16 bits of clut0 with lower 16 bits of vm
// HIGH_16BITS_VM
// CSA in 0-15
// Replace lower 16 bits of clut0 with higher 16 bits of vm
// CSA in 16-31
// Replace higher 16 bits of clut0 with higher 16 bits of vm
if(HIGH_16BITS_VM && CSA_0_15) {
// move up to low
vm_0 = _mm_load_si128((__m128i*)vm); // 9 8 1 0
vm_1 = _mm_load_si128((__m128i*)vm+1); // 11 10 3 2
vm_2 = _mm_load_si128((__m128i*)vm+2); // 13 12 5 4
vm_3 = _mm_load_si128((__m128i*)vm+3); // 15 14 7 6
vm_0 = _mm_srli_epi32(vm_0, 16);
vm_1 = _mm_srli_epi32(vm_1, 16);
vm_2 = _mm_srli_epi32(vm_2, 16);
vm_3 = _mm_srli_epi32(vm_3, 16);
} else if(HIGH_16BITS_VM && !CSA_0_15) {
// Remove lower 16 bits
vm_0 = _mm_andnot_si128(clut_mask, _mm_load_si128((__m128i*)vm)); // 9 8 1 0
vm_1 = _mm_andnot_si128(clut_mask, _mm_load_si128((__m128i*)vm+1)); // 11 10 3 2
vm_2 = _mm_andnot_si128(clut_mask, _mm_load_si128((__m128i*)vm+2)); // 13 12 5 4
vm_3 = _mm_andnot_si128(clut_mask, _mm_load_si128((__m128i*)vm+3)); // 15 14 7 6
} else if(!HIGH_16BITS_VM && CSA_0_15) {
// Remove higher 16 bits
vm_0 = _mm_and_si128(clut_mask, _mm_load_si128((__m128i*)vm)); // 9 8 1 0
vm_1 = _mm_and_si128(clut_mask, _mm_load_si128((__m128i*)vm+1)); // 11 10 3 2
vm_2 = _mm_and_si128(clut_mask, _mm_load_si128((__m128i*)vm+2)); // 13 12 5 4
vm_3 = _mm_and_si128(clut_mask, _mm_load_si128((__m128i*)vm+3)); // 15 14 7 6
} else if(!HIGH_16BITS_VM && !CSA_0_15) {
// move low to high
vm_0 = _mm_load_si128((__m128i*)vm); // 9 8 1 0
vm_1 = _mm_load_si128((__m128i*)vm+1); // 11 10 3 2
vm_2 = _mm_load_si128((__m128i*)vm+2); // 13 12 5 4
vm_3 = _mm_load_si128((__m128i*)vm+3); // 15 14 7 6
vm_0 = _mm_slli_epi32(vm_0, 16);
vm_1 = _mm_slli_epi32(vm_1, 16);
vm_2 = _mm_slli_epi32(vm_2, 16);
vm_3 = _mm_slli_epi32(vm_3, 16);
}
// Unsizzle the data
__m128i row_0 = _mm_unpacklo_epi32(vm_0, vm_1); // 3 2 1 0
__m128i row_1 = _mm_unpacklo_epi32(vm_2, vm_3); // 7 6 5 4
__m128i row_2 = _mm_unpackhi_epi32(vm_0, vm_1); // 11 10 9 8
__m128i row_3 = _mm_unpackhi_epi32(vm_2, vm_3); // 15 14 13 12
// load old data & remove useless part
if(CSA_0_15) {
// Remove lower 16 bits
clut_0 = _mm_andnot_si128(clut_mask, _mm_load_si128((__m128i*)clut));
clut_1 = _mm_andnot_si128(clut_mask, _mm_load_si128((__m128i*)clut+1));
clut_2 = _mm_andnot_si128(clut_mask, _mm_load_si128((__m128i*)clut+2));
clut_3 = _mm_andnot_si128(clut_mask, _mm_load_si128((__m128i*)clut+3));
} else {
// Remove higher 16 bits
clut_0 = _mm_and_si128(clut_mask, _mm_load_si128((__m128i*)clut));
clut_1 = _mm_and_si128(clut_mask, _mm_load_si128((__m128i*)clut+1));
clut_2 = _mm_and_si128(clut_mask, _mm_load_si128((__m128i*)clut+2));
clut_3 = _mm_and_si128(clut_mask, _mm_load_si128((__m128i*)clut+3));
}
// Merge old & new data
clut_0 = _mm_or_si128(clut_0, row_0);
clut_1 = _mm_or_si128(clut_1, row_1);
clut_2 = _mm_or_si128(clut_2, row_2);
clut_3 = _mm_or_si128(clut_3, row_3);
_mm_store_si128((__m128i*)clut, clut_0);
_mm_store_si128((__m128i*)clut+1, clut_1);
_mm_store_si128((__m128i*)clut+2, clut_2);
_mm_store_si128((__m128i*)clut+3, clut_3);
}
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32 csa)
{
u32* clut = (u32*)(g_pbyGSClut + 64*(csa & 15));
if (csa > 15) {
WriteCLUT_T16_I4_CSM1_core_sse2<false, false>(vm, clut);
} else {
WriteCLUT_T16_I4_CSM1_core_sse2<true, false>(vm, clut);
}
}
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2_old(u32* vm, u32* clut)
{
#define YET_ANOTHER_INTRINSIC
#ifdef YET_ANOTHER_INTRINSIC
@ -677,7 +782,7 @@ extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut)
// Note: MSVC complains about direct c-cast...
// vm2 = (__m128i)_mm_shuffle_ps((__m128)vm2, (__m128)vm3, 0x88);
__m128 vm2_f = (_mm_shuffle_ps((__m128&)vm2, (__m128&)vm3, 0x88)); // 14 12 10 8 6 4 2 0
__m128 vm2_f = (_mm_shuffle_ps((__m128&)vm2, (__m128&)vm3, 0x88));
vm2 = (__m128i&)vm2_f;
vm2 = _mm_shuffle_epi32(vm2, 0xD8);
@ -997,8 +1102,47 @@ End:
#endif
}
__forceinline void WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32 csa)
{
// update the right clut column (csa < 16)
u32* clut = (u32*)(g_pbyGSClut + 64*(csa & 15));
u32 csa_right = (csa < 16) ? 16 - csa : 0;
for(int i = (csa_right/2); i > 0 ; --i) {
WriteCLUT_T16_I4_CSM1_core_sse2<true,false>(vm, clut);
clut += 16;
WriteCLUT_T16_I4_CSM1_core_sse2<true,true>(vm, clut);
clut += 16;
vm += 16; // go down one column
}
// update the left clut column
u32 csa_left = (csa >= 16) ? 16 : csa;
// In case csa_right is odd (so csa_left is also odd), we cross the clut column
if(csa_right & 0x1) {
WriteCLUT_T16_I4_CSM1_core_sse2<true,false>(vm, clut);
// go back to the base before processing left clut column
clut = (u32*)(g_pbyGSClut);
WriteCLUT_T16_I4_CSM1_core_sse2<false,true>(vm, clut);
} else if(csa_right != 0) {
// go back to the base before processing left clut column
clut = (u32*)(g_pbyGSClut);
}
for(int i = (csa_left/2); i > 0 ; --i) {
WriteCLUT_T16_I4_CSM1_core_sse2<false,false>(vm, clut);
clut += 16;
WriteCLUT_T16_I4_CSM1_core_sse2<false,true>(vm, clut);
clut += 16;
vm += 16; // go down one column
}
}
#endif
#endif // ZEROGS_SSE2
#if 0
void __fastcall WriteCLUT_T16_I8_CSM1_c(u32* _vm, u32* _clut)
{
const static u32 map[] =
@ -1110,6 +1254,8 @@ void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut)
dst[7] = src[7];
}
#endif
void SSE2_UnswizzleZ16Target(u16* dst, u16* src, int iters)
{

View File

@ -96,9 +96,11 @@ extern void __fastcall SwizzleColumn16_c(int y, u8* dst, u8* src, int srcpitch);
extern void __fastcall SwizzleColumn8_c(int y, u8* dst, u8* src, int srcpitch);
extern void __fastcall SwizzleColumn4_c(int y, u8* dst, u8* src, int srcpitch);
extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32* clut);
// extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32* clut);
extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32 csa);
extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut);
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut);
// extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut);
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32 csa);
extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T16_I8_CSM1_c(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut);

File diff suppressed because it is too large Load Diff

View File

@ -25,523 +25,62 @@
#endif
// ----------------------------- Includes
#include <list>
#include <vector>
#include <map>
#include <string>
#include <math.h>
#include "ZZGl.h"
#include "GS.h"
#include "CRC.h"
#include "rasterfont.h" // simple font
using namespace std;
//------------------------ Constants ----------------------
#define VB_BUFFERSIZE 0x400
// Used in a logarithmic Z-test, as (1-o(1))/log(MAX_U32).
const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
//------------------------ Inlines -------------------------
// Calculate maximum height for target
inline int get_maxheight(int fbp, int fbw, int psm)
{
int ret;
if (fbw == 0) return 0;
ret = (((0x00100000 - 64 * fbp) / fbw) & ~0x1f);
if (PSMT_ISHALF(psm)) ret *= 2;
return ret;
}
#include "PS2Edefs.h"
// ------------------------ Variables -------------------------
// all textures have this width
extern int GPU_TEXWIDTH;
extern float g_fiGPU_TEXWIDTH;
#define MASKDIVISOR 0 // Used for decrement bitwise mask texture size if 1024 is too big
#define GPU_TEXMASKWIDTH (1024 >> MASKDIVISOR) // bitwise mask width for region repeat mode
extern u32 ptexBlocks; // holds information on block tiling. It's texture number in OpenGL -- if 0 than such texture
extern u32 ptexConv16to32; // does not exists. This textures should be created on start and released on finish.
extern u32 ptexBilinearBlocks;
extern u32 ptexConv32to16;
// this is currently *not* used as a bool, in spite of its moniker --air
// Actually, the only thing written to it is 1 or 0, which makes the (g_bSaveFlushedFrame & 0x80000000) check rather bizzare.
//extern u32 g_bSaveFlushedFrame;
//////////////////////////
// State parameters
#ifdef ZEROGS_DEVBUILD
extern char* EFFECT_NAME;
extern char* EFFECT_DIR;
extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve;
extern bool g_bSaveTrans, g_bUpdateEffect, g_bSaveTex, g_bSaveResolved;
#if defined(_WIN32)
# include <windows.h>
# include "resource.h"
#endif
extern u32 s_uFramebuffer;
extern int g_nPixelShaderVer;
#include <stdlib.h>
#include "GS.h"
#include "targets.h"
#include "GLWin.h"
#include "ZZoglShaders.h"
#include "ZZClut.h"
#include "HostMemory.h"
typedef void (APIENTRYP _PFNSWAPINTERVAL)(int);
PFNGLISRENDERBUFFEREXTPROC glIsRenderbufferEXT = NULL;
PFNGLBINDRENDERBUFFEREXTPROC glBindRenderbufferEXT = NULL;
PFNGLDELETERENDERBUFFERSEXTPROC glDeleteRenderbuffersEXT = NULL;
PFNGLGENRENDERBUFFERSEXTPROC glGenRenderbuffersEXT = NULL;
PFNGLRENDERBUFFERSTORAGEEXTPROC glRenderbufferStorageEXT = NULL;
PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC glGetRenderbufferParameterivEXT = NULL;
PFNGLISFRAMEBUFFEREXTPROC glIsFramebufferEXT = NULL;
PFNGLBINDFRAMEBUFFEREXTPROC glBindFramebufferEXT = NULL;
PFNGLDELETEFRAMEBUFFERSEXTPROC glDeleteFramebuffersEXT = NULL;
PFNGLGENFRAMEBUFFERSEXTPROC glGenFramebuffersEXT = NULL;
PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC glCheckFramebufferStatusEXT = NULL;
PFNGLFRAMEBUFFERTEXTURE1DEXTPROC glFramebufferTexture1DEXT = NULL;
PFNGLFRAMEBUFFERTEXTURE2DEXTPROC glFramebufferTexture2DEXT = NULL;
PFNGLFRAMEBUFFERTEXTURE3DEXTPROC glFramebufferTexture3DEXT = NULL;
PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC glFramebufferRenderbufferEXT = NULL;
PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC glGetFramebufferAttachmentParameterivEXT = NULL;
PFNGLGENERATEMIPMAPEXTPROC glGenerateMipmapEXT = NULL;
PFNGLDRAWBUFFERSPROC glDrawBuffers = NULL;
#ifndef GL_FRAMEBUFFER_INCOMPLETE_DUPLICATE_ATTACHMENT_EXT
#define GL_FRAMEBUFFER_INCOMPLETE_DUPLICATE_ATTACHMENT_EXT 0x8CD8
#endif
bool ZZCreate(int width, int height);
void ZZGSStateReset();
extern bool s_bWriteDepth;
extern u32 ptexLogo;
extern int nLogoWidth, nLogoHeight;
extern int nBackbufferWidth, nBackbufferHeight;
namespace ZeroGS
{
typedef void (*DrawFn)();
// managers render-to-texture targets
class CRenderTarget
{
public:
CRenderTarget();
virtual ~CRenderTarget();
virtual bool Create(const frameInfo& frame);
virtual void Destroy();
// set the GPU_POSXY variable, scissor rect, and current render target
void SetTarget(int fbplocal, const Rect2& scissor, int context);
void SetViewport();
// copies/creates the feedback contents
inline void CreateFeedback()
{
if (ptexFeedback == 0 || !(status&TS_FeedbackReady))
_CreateFeedback();
}
virtual void Resolve();
virtual void Resolve(int startrange, int endrange); // resolves only in the allowed range
virtual void Update(int context, CRenderTarget* pdepth);
virtual void ConvertTo32(); // converts a psm==2 target, to a psm==0
virtual void ConvertTo16(); // converts a psm==0 target, to a psm==2
virtual bool IsDepth() { return false; }
void SetRenderTarget(int targ);
void* psys; // system data used for comparison
u32 ptex;
int fbp, fbw, fbh, fbhCalc; // if fbp is negative, virtual target (not mapped to any real addr)
int start, end; // in bytes
u32 lastused; // time stamp since last used
float4 vposxy;
u32 fbm;
u16 status;
u8 psm;
u8 resv0;
Rect scissorrect;
u8 created; // Check for object destruction/creating for r201.
//int startresolve, endresolve;
u32 nUpdateTarg; // use this target to update the texture if non 0 (one time only)
// this is optionally used when feedback effects are used (render target is used as a texture when rendering to itself)
u32 ptexFeedback;
enum TargetStatus
{
TS_Resolved = 1,
TS_NeedUpdate = 2,
TS_Virtual = 4, // currently not mapped to memory
TS_FeedbackReady = 8, // feedback effect is ready and doesn't need to be updated
TS_NeedConvert32 = 16,
TS_NeedConvert16 = 32,
};
inline float4 DefaultBitBltPos();
inline float4 DefaultBitBltTex();
private:
void _CreateFeedback();
inline bool InitialiseDefaultTexture(u32 *p_ptr, int fbw, int fbh) ;
};
// manages zbuffers
class CDepthTarget : public CRenderTarget
{
public:
CDepthTarget();
virtual ~CDepthTarget();
virtual bool Create(const frameInfo& frame);
virtual void Destroy();
virtual void Resolve();
virtual void Resolve(int startrange, int endrange); // resolves only in the allowed range
virtual void Update(int context, CRenderTarget* prndr);
virtual bool IsDepth() { return true; }
void SetDepthStencilSurface();
u32 pdepth; // 24 bit, will contain the stencil buffer if possible
u32 pstencil; // if not 0, contains the stencil buffer
int icount; // internal counter
};
// manages contiguous chunks of memory (width is always 1024)
class CMemoryTarget
{
public:
struct TEXTURE
{
inline TEXTURE() : tex(0), memptr(NULL), ref(0) {}
inline ~TEXTURE() { glDeleteTextures(1, &tex); _aligned_free(memptr); }
u32 tex;
u8* memptr; // GPU memory used for comparison
int ref;
};
inline CMemoryTarget() : ptex(NULL), starty(0), height(0), realy(0), realheight(0), usedstamp(0), psm(0), cpsm(0), channels(0), clearminy(0), clearmaxy(0), validatecount(0) {}
inline CMemoryTarget(const CMemoryTarget& r)
{
ptex = r.ptex;
if (ptex != NULL) ptex->ref++;
starty = r.starty;
height = r.height;
realy = r.realy;
realheight = r.realheight;
usedstamp = r.usedstamp;
psm = r.psm;
cpsm = r.cpsm;
clut = r.clut;
clearminy = r.clearminy;
clearmaxy = r.clearmaxy;
widthmult = r.widthmult;
texH = r.texH;
texW = r.texW;
channels = r.channels;
validatecount = r.validatecount;
fmt = r.fmt;
}
~CMemoryTarget() { Destroy(); }
inline void Destroy()
{
if (ptex != NULL && ptex->ref > 0)
{
if (--ptex->ref <= 0) delete ptex;
}
ptex = NULL;
}
// returns true if clut data is synced
bool ValidateClut(const tex0Info& tex0);
// returns true if tex data is synced
bool ValidateTex(const tex0Info& tex0, int starttex, int endtex, bool bDeleteBadTex);
// realy is offset in pixels from start of valid region
// so texture in memory is [realy,starty+height]
// valid texture is [starty,starty+height]
// offset in mem [starty-realy, height]
TEXTURE* ptex; // can be 16bit
int starty, height; // assert(starty >= realy)
int realy, realheight; // this is never touched once allocated
// realy is start pointer of data in 4M data block (start) and size (end-start).
u32 usedstamp;
u8 psm, cpsm; // texture and clut format. For psm, only 16bit/32bit differentiation matters
u32 fmt;
int widthmult; // Either 1 or 2.
int channels; // The number of pixels per PSM format word. channels == PIXELS_PER_WORD(psm)
// This is the real drawing size in pixels of the texture in renderbuffer.
int texW; // (realheight + widthmult - 1)/widthmult == realheight or [(realheight+1)/2]
int texH; // GPU_TEXWIDTH *widthmult * channels;
int clearminy, clearmaxy; // when maxy > 0, need to check for clearing
int validatecount; // count how many times has been validated, if too many, destroy
vector<u8> clut; // if nonzero, texture uses CLUT
};
struct VB
{
VB();
~VB();
void Destroy();
inline bool CheckPrim()
{
static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA)
if ((PRIMMASK & prim->_val) != (PRIMMASK & curprim._val) || primtype[prim->prim] != primtype[curprim.prim])
return nCount > 0;
return false;
}
void CheckFrame(int tbp);
// context specific state
Point offset;
Rect2 scissor;
tex0Info tex0;
tex1Info tex1;
miptbpInfo miptbp0;
miptbpInfo miptbp1;
alphaInfo alpha;
fbaInfo fba;
clampInfo clamp;
pixTest test;
u32 ptexClamp[2]; // textures for x and y dir region clamping
public:
void FlushTexData();
inline int CheckFrameAddConstraints(int tbp);
inline void CheckScissors(int maxpos);
inline void CheckFrame32bitRes(int maxpos);
inline int FindMinimalMemoryConstrain(int tbp, int maxpos);
inline int FindZbufferMemoryConstrain(int tbp, int maxpos);
inline int FindMinimalHeightConstrain(int maxpos);
inline int CheckFrameResolveRender(int tbp);
inline void CheckFrame16vs32Conversion();
inline int CheckFrameResolveDepth(int tbp);
inline void FlushTexUnchangedClutDontUpdate() ;
inline void FlushTexClutDontUpdate() ;
inline void FlushTexClutting() ;
inline void FlushTexSetNewVars(u32 psm) ;
// notify VB that nVerts need to be written to pbuf
inline void NotifyWrite(int nVerts)
{
assert(pBufferData != NULL && nCount <= nNumVertices && nVerts > 0);
if (nCount + nVerts > nNumVertices)
{
// recreate except with a bigger count
VertexGPU* ptemp = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nNumVertices * 2, 256);
memcpy_amd(ptemp, pBufferData, sizeof(VertexGPU) * nCount);
nNumVertices *= 2;
assert(nCount + nVerts <= nNumVertices);
_aligned_free(pBufferData);
pBufferData = ptemp;
}
}
void Init(int nVerts)
{
if (pBufferData == NULL && nVerts > 0)
{
pBufferData = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nVerts, 256);
nNumVertices = nVerts;
}
nCount = 0;
}
u8 bNeedFrameCheck;
u8 bNeedZCheck;
u8 bNeedTexCheck;
u8 dummy0;
union
{
struct
{
u8 bTexConstsSync; // only pixel shader constants that context owns
u8 bVarsTexSync; // texture info
u8 bVarsSetTarg;
u8 dummy1;
};
u32 bSyncVars;
};
int ictx;
VertexGPU* pBufferData; // current allocated data
int nNumVertices; // size of pBufferData in terms of VertexGPU objects
int nCount;
primInfo curprim; // the previous prim the current buffers are set to
zbufInfo zbuf;
frameInfo gsfb; // the real info set by FRAME cmd
frameInfo frame;
int zprimmask; // zmask for incoming points
union
{
u32 uCurTex0Data[2]; // current tex0 data
GIFRegTEX0 uCurTex0;
};
u32 uNextTex0Data[2]; // tex0 data that has to be applied if bNeedTexCheck is 1
//int nFrameHeights[8]; // frame heights for the past frame changes
int nNextFrameHeight;
CMemoryTarget* pmemtarg; // the current mem target set
CRenderTarget* prndr;
CDepthTarget* pdepth;
};
// visible members
extern DrawFn drawfn[8];
// VB variables
extern VB vb[2];
extern float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
extern vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
extern GLuint vboRect;
extern int g_nCurVBOIndex;
void AddMessage(const char* pstr, u32 ms = 5000);
void DrawText(const char* pstr, int left, int top, u32 color);
void ChangeWindowSize(int nNewWidth, int nNewHeight);
void SetChangeDeviceSize(int nNewWidth, int nNewHeight);
void ChangeDeviceSize(int nNewWidth, int nNewHeight);
void SetAA(int mode);
void SetCRC(int crc);
void ReloadEffects();
// Methods //
bool IsGLExt(const char* szTargetExtension); ///< returns true if the the opengl extension is supported
inline bool Create_Window(int _width, int _height);
bool Create(int width, int height);
void Destroy(bool bD3D);
void Reset(); // call to destroy video resources
void GSStateReset();
void GSReset();
void GSSoftReset(u32 mask);
void HandleGLError();
// called on a primitive switch
void Prim();
void SetTexFlush();
// flush current vertices, call before setting new registers (the main render method)
void Flush(int context);
void FlushBoth();
void ExtWrite();
void SetWriteDepth();
bool IsWriteDepth();
//extern u32 ptexLogo;
//extern int nLogoWidth, nLogoHeight;
//extern GLuint vboRect;
//void ProcessMessages();
//void RenderCustom(float fAlpha); // intro anim
void SetDestAlphaTest();
bool IsWriteDestAlphaTest();
void SetFogColor(u32 fog);
void SetFogColor(GIFRegFOGCOL* fog);
void SaveTex(tex0Info* ptex, int usevid);
char* NamedSaveTex(tex0Info* ptex, int usevid);
// called when trxdir is accessed. If host is involved, transfers memory to temp buffer byTransferBuf.
// Otherwise performs the transfer. TODO: Perhaps divide the transfers into chunks?
void InitTransferHostLocal();
void TransferHostLocal(const void* pbyMem, u32 nQWordSize);
void InitTransferLocalHost();
void TransferLocalHost(void* pbyMem, u32 nQWordSize);
inline void TerminateLocalHost() {}
void TransferLocalLocal();
// switches the render target to the real target, flushes the current render targets and renders the real image
void RenderCRTC(int interlace);
void ResetRenderTarget(int index);
bool CheckChangeInClut(u32 highdword, u32 psm); // returns true if clut will change after this tex0 op
// call to load CLUT data (depending on CLD)
void texClutWrite(int ctx);
int Save(s8* pbydata);
bool Load(s8* pbydata);
void SaveSnapshot(const char* filename);
bool SaveRenderTarget(const char* filename, int width, int height, int jpeg);
bool SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int height);
bool SaveJPEG(const char* filename, int width, int height, const void* pdata, int quality);
bool SaveTGA(const char* filename, int width, int height, void* pdata);
void Stop_Avi();
void Delete_Avi_Capture();
// private methods
void FlushSysMem(const RECT* prc);
void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, bool mode);
// returns the first and last addresses aligned to a page that cover
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
// inits the smallest rectangle in ptexMem that covers this region in ptexMem
// returns the offset that needs to be added to the locked rect to get the beginning of the buffer
//void GetMemRect(RECT& rc, int psm, int x, int y, int w, int h, int bp, int bw);
void SetContextTarget(int context) ;
void NeedFactor(int w);
void ResetAlphaVariables();
void StartCapture();
void StopCapture();
void CaptureFrame();
// Perform clutting for flushed texture. Better check if it needs a prior call.
inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx)
{
tex0->cbp = ZZOglGet_cbp_TexBits(Data);
tex0->cpsm = ZZOglGet_cpsm_TexBits(Data);
tex0->csm = ZZOglGet_csm_TexBits(Data);
tex0->csa = ZZOglGet_csa_TexBits(Data);
tex0->cld = ZZOglGet_cld_TexBits(Data);
ZeroGS::texClutWrite(ictx);
};
// The size in bytes of x strings (of texture).
inline int MemorySize(int x)
{
return 4 * GPU_TEXWIDTH * x;
}
// Return the address in memory of data block for string x.
inline u8* MemoryAddress(int x)
{
return g_pbyGSMemory + MemorySize(x);
}
template <u32 mult>
inline u8* _MemoryAddress(int x)
{
return g_pbyGSMemory + mult * x;
}
};
#endif